{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Happy Customer Bank——目标客户识别\n",
    "\n",
    "    这是金融行业的任务，预测Happy Customer Bank对客户发放贷款的概率，来自于Data Hackathon 3.x。\n",
    "\n",
    "- 问题描述：https://discuss.analyticsvidhya.com/t/hackathon-3-x-predict-customer-worth-for-happy-customer-bank/3802\n",
    "\n",
    "- 该问题的优胜解决方案：\n",
    "                 https://medium.com/data-science-analytics/analytics-vidhya-3-x-hackathon-9f2550b47be6\n",
    "                 https://github.com/binga/AnalyticsVidhya_3.X_Hackathon\n",
    "\n",
    "  \n",
    "## 1、数据探索\n",
    "\n",
    "### （1）导入相关包和数据"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>ID</th>\n",
       "      <th>Gender</th>\n",
       "      <th>City</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>DOB</th>\n",
       "      <th>Lead_Creation_Date</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>...</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>ID000002C20</td>\n",
       "      <td>Female</td>\n",
       "      <td>Delhi</td>\n",
       "      <td>20000</td>\n",
       "      <td>23-May-78</td>\n",
       "      <td>15-May-15</td>\n",
       "      <td>300000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>CYBOSOL</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>ID000004E40</td>\n",
       "      <td>Male</td>\n",
       "      <td>Mumbai</td>\n",
       "      <td>35000</td>\n",
       "      <td>7-Oct-85</td>\n",
       "      <td>4-May-15</td>\n",
       "      <td>200000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>TATA CONSULTANCY SERVICES LTD (TCS)</td>\n",
       "      <td>...</td>\n",
       "      <td>13.25</td>\n",
       "      <td>NaN</td>\n",
       "      <td>6762.9</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>G</td>\n",
       "      <td>S122</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>ID000007H20</td>\n",
       "      <td>Male</td>\n",
       "      <td>Panchkula</td>\n",
       "      <td>22500</td>\n",
       "      <td>10-Oct-81</td>\n",
       "      <td>19-May-15</td>\n",
       "      <td>600000.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>ALCHEMIST HOSPITALS LTD</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S143</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>ID000008I30</td>\n",
       "      <td>Male</td>\n",
       "      <td>Saharsa</td>\n",
       "      <td>35000</td>\n",
       "      <td>30-Nov-87</td>\n",
       "      <td>9-May-15</td>\n",
       "      <td>1000000.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>BIHAR GOVERNMENT</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S143</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>ID000009J40</td>\n",
       "      <td>Male</td>\n",
       "      <td>Bengaluru</td>\n",
       "      <td>100000</td>\n",
       "      <td>17-Feb-84</td>\n",
       "      <td>20-May-15</td>\n",
       "      <td>500000.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>GLOBAL EDGE SOFTWARE</td>\n",
       "      <td>...</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>N</td>\n",
       "      <td>Web-browser</td>\n",
       "      <td>B</td>\n",
       "      <td>S134</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 26 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "            ID  Gender       City  Monthly_Income        DOB  \\\n",
       "0  ID000002C20  Female      Delhi           20000  23-May-78   \n",
       "1  ID000004E40    Male     Mumbai           35000   7-Oct-85   \n",
       "2  ID000007H20    Male  Panchkula           22500  10-Oct-81   \n",
       "3  ID000008I30    Male    Saharsa           35000  30-Nov-87   \n",
       "4  ID000009J40    Male  Bengaluru          100000  17-Feb-84   \n",
       "\n",
       "  Lead_Creation_Date  Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  \\\n",
       "0          15-May-15             300000.0                  5.0           0.0   \n",
       "1           4-May-15             200000.0                  2.0           0.0   \n",
       "2          19-May-15             600000.0                  4.0           0.0   \n",
       "3           9-May-15            1000000.0                  5.0           0.0   \n",
       "4          20-May-15             500000.0                  2.0       25000.0   \n",
       "\n",
       "                         Employer_Name  ... Interest_Rate Processing_Fee  \\\n",
       "0                              CYBOSOL  ...           NaN            NaN   \n",
       "1  TATA CONSULTANCY SERVICES LTD (TCS)  ...         13.25            NaN   \n",
       "2              ALCHEMIST HOSPITALS LTD  ...           NaN            NaN   \n",
       "3                     BIHAR GOVERNMENT  ...           NaN            NaN   \n",
       "4                 GLOBAL EDGE SOFTWARE  ...           NaN            NaN   \n",
       "\n",
       "   EMI_Loan_Submitted Filled_Form  Device_Type  Var2  Source  Var4  LoggedIn  \\\n",
       "0                 NaN           N  Web-browser     G    S122     1         0   \n",
       "1              6762.9           N  Web-browser     G    S122     3         0   \n",
       "2                 NaN           N  Web-browser     B    S143     1         0   \n",
       "3                 NaN           N  Web-browser     B    S143     3         0   \n",
       "4                 NaN           N  Web-browser     B    S134     3         1   \n",
       "\n",
       "  Disbursed  \n",
       "0         0  \n",
       "1         0  \n",
       "2         0  \n",
       "3         0  \n",
       "4         0  \n",
       "\n",
       "[5 rows x 26 columns]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 读入数据\n",
    "train = pd.read_csv('HappyBank/data/Train.csv')\n",
    "test = pd.read_csv('HappyBank/data/Test.csv')\n",
    "\n",
    "train.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （2）数据基本信息\n",
    "\n",
    "    1. 样本数与特征维数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Train dataset dimensions: (87020, 26)\n",
      "Test dataset dimensions: (37717, 24)\n"
     ]
    }
   ],
   "source": [
    "print (\"Train dataset dimensions:\", train.shape)\n",
    "print (\"Test dataset dimensions:\", test.shape)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    2. 特征字段"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "<class 'pandas.core.frame.DataFrame'>\n",
      "RangeIndex: 87020 entries, 0 to 87019\n",
      "Data columns (total 26 columns):\n",
      "ID                       87020 non-null object\n",
      "Gender                   87020 non-null object\n",
      "City                     86017 non-null object\n",
      "Monthly_Income           87020 non-null int64\n",
      "DOB                      87020 non-null object\n",
      "Lead_Creation_Date       87020 non-null object\n",
      "Loan_Amount_Applied      86949 non-null float64\n",
      "Loan_Tenure_Applied      86949 non-null float64\n",
      "Existing_EMI             86949 non-null float64\n",
      "Employer_Name            86949 non-null object\n",
      "Salary_Account           75256 non-null object\n",
      "Mobile_Verified          87020 non-null object\n",
      "Var5                     87020 non-null int64\n",
      "Var1                     87020 non-null object\n",
      "Loan_Amount_Submitted    52407 non-null float64\n",
      "Loan_Tenure_Submitted    52407 non-null float64\n",
      "Interest_Rate            27726 non-null float64\n",
      "Processing_Fee           27420 non-null float64\n",
      "EMI_Loan_Submitted       27726 non-null float64\n",
      "Filled_Form              87020 non-null object\n",
      "Device_Type              87020 non-null object\n",
      "Var2                     87020 non-null object\n",
      "Source                   87020 non-null object\n",
      "Var4                     87020 non-null int64\n",
      "LoggedIn                 87020 non-null int64\n",
      "Disbursed                87020 non-null int64\n",
      "dtypes: float64(8), int64(5), object(13)\n",
      "memory usage: 17.3+ MB\n"
     ]
    }
   ],
   "source": [
    "train.info()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "字段说明：\n",
    "数据集共26个字段: 其中1-24列为输入特征，25-26列为输出特征。 \n",
    "1. ID - 唯一ID（不能用于预测） \n",
    "2. Gender - 性别 \n",
    "3. City - 城市 \n",
    "4. Monthly_Income - 月收入（以卢比为单位） \n",
    "5. DOB - 出生日期 \n",
    "6. Lead_Creation_Date - 潜在（贷款）创建日期 \n",
    "7. Loan_Amount_Applied - 贷款申请请求金额（印度卢比，INR） \n",
    "8. Loan_Tenure_Applied - 贷款申请期限（单位为年） \n",
    "9. Existing_EMI -现有贷款的EMI（EMI：电子货币机构许可证） \n",
    "10. Employer_Name雇主名称 \n",
    "11. Salary_Account - 薪资帐户银行 \n",
    "12. Mobile_Verified - 是否移动验证（Y / N） \n",
    "13. VAR5 - 连续型变量 \n",
    "14. VAR1- 类别型变量 \n",
    "15. Loan_Amount_Submitted - 提交的贷款金额（在看到资格后修改和选择） \n",
    "16. Loan_Tenure_Submitted - 提交的贷款期限（单位为年，在看到资格后修改和选择） \n",
    "17. Interest_Rate - 提交贷款金额的利率 \n",
    "18. Processing_Fee - 提交贷款的处理费（INR） \n",
    "19. EMI_Loan_Submitted -提交的EMI贷款金额（INR） \n",
    "20. Filled_Form - 后期报价后是否已填写申请表格 \n",
    "21. Device_Type - 进行申请的设备（浏览器/移动设备） \n",
    "22. Var2 - 类别型变量 \n",
    "23. Source - 类别型变量 \n",
    "24. Var4 - 类别型变量 \n",
    "\n",
    "输出： \n",
    "25. LoggedIn - 是否login（只用于理解问题的变量，不能用于预测，测试集中没有） \n",
    "26. Disbursed - 是否发放贷款（目标变量），1为发放贷款（目标客户） "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    3. 特征分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>Loan_Amount_Applied</th>\n",
       "      <th>Loan_Tenure_Applied</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Var5</th>\n",
       "      <th>Loan_Amount_Submitted</th>\n",
       "      <th>Loan_Tenure_Submitted</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Var4</th>\n",
       "      <th>LoggedIn</th>\n",
       "      <th>Disbursed</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>count</th>\n",
       "      <td>8.702000e+04</td>\n",
       "      <td>8.694900e+04</td>\n",
       "      <td>86949.000000</td>\n",
       "      <td>8.694900e+04</td>\n",
       "      <td>87020.000000</td>\n",
       "      <td>5.240700e+04</td>\n",
       "      <td>52407.000000</td>\n",
       "      <td>27726.000000</td>\n",
       "      <td>27420.000000</td>\n",
       "      <td>27726.000000</td>\n",
       "      <td>87020.000000</td>\n",
       "      <td>87020.000000</td>\n",
       "      <td>87020.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>mean</th>\n",
       "      <td>5.884997e+04</td>\n",
       "      <td>2.302507e+05</td>\n",
       "      <td>2.131399</td>\n",
       "      <td>3.696228e+03</td>\n",
       "      <td>4.961503</td>\n",
       "      <td>3.950106e+05</td>\n",
       "      <td>3.891369</td>\n",
       "      <td>19.197474</td>\n",
       "      <td>5131.150839</td>\n",
       "      <td>10999.528377</td>\n",
       "      <td>2.949805</td>\n",
       "      <td>0.029350</td>\n",
       "      <td>0.014629</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>std</th>\n",
       "      <td>2.177511e+06</td>\n",
       "      <td>3.542068e+05</td>\n",
       "      <td>2.014193</td>\n",
       "      <td>3.981021e+04</td>\n",
       "      <td>5.670385</td>\n",
       "      <td>3.082481e+05</td>\n",
       "      <td>1.165359</td>\n",
       "      <td>5.834213</td>\n",
       "      <td>4725.837644</td>\n",
       "      <td>7512.323050</td>\n",
       "      <td>1.697720</td>\n",
       "      <td>0.168785</td>\n",
       "      <td>0.120062</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>min</th>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>5.000000e+04</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>11.990000</td>\n",
       "      <td>200.000000</td>\n",
       "      <td>1176.410000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25%</th>\n",
       "      <td>1.650000e+04</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>2.000000e+05</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>15.250000</td>\n",
       "      <td>2000.000000</td>\n",
       "      <td>6491.600000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>50%</th>\n",
       "      <td>2.500000e+04</td>\n",
       "      <td>1.000000e+05</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>0.000000e+00</td>\n",
       "      <td>2.000000</td>\n",
       "      <td>3.000000e+05</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>4000.000000</td>\n",
       "      <td>9392.970000</td>\n",
       "      <td>3.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>75%</th>\n",
       "      <td>4.000000e+04</td>\n",
       "      <td>3.000000e+05</td>\n",
       "      <td>4.000000</td>\n",
       "      <td>3.500000e+03</td>\n",
       "      <td>11.000000</td>\n",
       "      <td>5.000000e+05</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>20.000000</td>\n",
       "      <td>6250.000000</td>\n",
       "      <td>12919.040000</td>\n",
       "      <td>5.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>max</th>\n",
       "      <td>4.445544e+08</td>\n",
       "      <td>1.000000e+07</td>\n",
       "      <td>10.000000</td>\n",
       "      <td>1.000000e+07</td>\n",
       "      <td>18.000000</td>\n",
       "      <td>3.000000e+06</td>\n",
       "      <td>6.000000</td>\n",
       "      <td>37.000000</td>\n",
       "      <td>50000.000000</td>\n",
       "      <td>144748.280000</td>\n",
       "      <td>7.000000</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       Monthly_Income  Loan_Amount_Applied  Loan_Tenure_Applied  Existing_EMI  \\\n",
       "count    8.702000e+04         8.694900e+04         86949.000000  8.694900e+04   \n",
       "mean     5.884997e+04         2.302507e+05             2.131399  3.696228e+03   \n",
       "std      2.177511e+06         3.542068e+05             2.014193  3.981021e+04   \n",
       "min      0.000000e+00         0.000000e+00             0.000000  0.000000e+00   \n",
       "25%      1.650000e+04         0.000000e+00             0.000000  0.000000e+00   \n",
       "50%      2.500000e+04         1.000000e+05             2.000000  0.000000e+00   \n",
       "75%      4.000000e+04         3.000000e+05             4.000000  3.500000e+03   \n",
       "max      4.445544e+08         1.000000e+07            10.000000  1.000000e+07   \n",
       "\n",
       "               Var5  Loan_Amount_Submitted  Loan_Tenure_Submitted  \\\n",
       "count  87020.000000           5.240700e+04           52407.000000   \n",
       "mean       4.961503           3.950106e+05               3.891369   \n",
       "std        5.670385           3.082481e+05               1.165359   \n",
       "min        0.000000           5.000000e+04               1.000000   \n",
       "25%        0.000000           2.000000e+05               3.000000   \n",
       "50%        2.000000           3.000000e+05               4.000000   \n",
       "75%       11.000000           5.000000e+05               5.000000   \n",
       "max       18.000000           3.000000e+06               6.000000   \n",
       "\n",
       "       Interest_Rate  Processing_Fee  EMI_Loan_Submitted          Var4  \\\n",
       "count   27726.000000    27420.000000        27726.000000  87020.000000   \n",
       "mean       19.197474     5131.150839        10999.528377      2.949805   \n",
       "std         5.834213     4725.837644         7512.323050      1.697720   \n",
       "min        11.990000      200.000000         1176.410000      0.000000   \n",
       "25%        15.250000     2000.000000         6491.600000      1.000000   \n",
       "50%        18.000000     4000.000000         9392.970000      3.000000   \n",
       "75%        20.000000     6250.000000        12919.040000      5.000000   \n",
       "max        37.000000    50000.000000       144748.280000      7.000000   \n",
       "\n",
       "           LoggedIn     Disbursed  \n",
       "count  87020.000000  87020.000000  \n",
       "mean       0.029350      0.014629  \n",
       "std        0.168785      0.120062  \n",
       "min        0.000000      0.000000  \n",
       "25%        0.000000      0.000000  \n",
       "50%        0.000000      0.000000  \n",
       "75%        0.000000      0.000000  \n",
       "max        1.000000      1.000000  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    4. 类别型特征的分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# cat_features = ['Gender','City','Employer_Name','Salary_Account','Mobile_Verified','Var1','Filled_Form','Device_Type','Var2','Source','Var4']\n",
    "# for col in cat_features:\n",
    "#     num_vlaules = len(data[col].unique())\n",
    "#     print ('\\n%s属性有%d的不同取值，各取值及其出现的次数\\n'% (col,num_vlaules) )\n",
    "#     print(data[col].value_counts())"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    5. 标签分布"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAAEICAYAAABxiqLiAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFI5JREFUeJzt3X20XXV95/H3J5BkCE9aEh6kVCrOcsqMqJA+qUAKVArSEpVBbQeKzBBpU4SlVaniE9anIkqUKMRQ1rgKdKgS1OIMIiYBrdoJqG3Dsq1YARVIhELlQRPCt3/sfen1EnL33dxz7z3k/VrrrrvP7/zO3t/DSvJh/357/3aqCkmSJmrWdBcgSRpOBogkqRcDRJLUiwEiSerFAJEk9WKASJJ6MUAkSb0YIJKkXgwQSVIvO053AYM0f/782n///ae7DEkaKjfddNOPqmrBeP2e0gGy//77s27duukuQ5KGSpLbuvRzCEuS1IsBIknqxQCRJPVigEiSejFAJEm9GCCSpF4MEElSLwaIJKkXA0SS1MtT+k70yXDIGz853SVoBrrpvJOnuwRp2nkGIknqxQCRJPVigEiSejFAJEm9GCCSpF4MEElSLwaIJKkXA0SS1IsBIknqZSABkmROkkuSfDXJ6iSHJXlGks8nWZvk8iS7tn1fkeQrSW5IcnrbNjvJirbvdUme07Yf2O5vTZKPJZk9iPolSeMb1BnIfwYeBV4EHA+8FXgfcE5VHQ5cBrwlye7AHwJHAIcDi5L8EnAycEvb9zXAh9v9fhA4uaoWAeuBUwdUvyRpHAMJkKpaD8wBbgPuBG4Fnl5VN7fvXwMcCPwqsKqqflpVBXwcOIYmUFa2fb8P3J1kT+DBqrqjPcwK4KhB1C9JGt+ghrCOB+4BngnsB1wBbBnT7VFgD+DuUW13AXsC86rqgTHtBwAbRhqqajMuBilJ02ZQQ1hHAx+qqker6l6a4am5Wzn2PcCCUW1704TEQ0nmjWm/FZg/0tDOf4wNJZIsSbIuybqNGzdOypeRJD3eoALk74HfBEgyl2aoaackz23bjgVuAb4OHN9Omgc4Hfg8sBo4pe27L7BXVW0Adk6yT3uM04Drxh64qlZU1cKqWrhgwYKxb0uSJsmghoBWAhcmORmYB1wMfAFY0Z5Z/AA4vap+nOQSmsAo4LKq+naS7wLLk6wBNgNntPs9G7gsySyaSfSzBlS/JGkcAwmQdn7itVt569it9L0SuHJM2yaaM4yxff+BZoJdkjTNvJFQktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpl3EDJMkOo7b3S2LoSJI6nYH8RZK5SV4FnAP81YBrkiQNgS4BcgCwG7BfVb0WmD/YkiRJw6BLgPw58CHg4iS7A18dbEmSpGGw43gdquqiJFdU1b+1TWcPuCZJ0hDoMon+34EbkuyQ5Kokz5yCuiRJM1yXIaxXAfdU1RbgTOCiwZYkSRoGXQJkl5GNqroDmDu4ciRJw6JLgNw2spHkGUAGV44kaVh0CZCLgd2SvBe4FnjrYEuSJA2DLldh3ZTkcOA5wHur6oHBlyVJmunGDZD2DvTNVfXpJCck2VxVn5mC2iRJM1iXIaylwNXt9tU0V2JJkrZzXQLkkfYSXqrqEZxElyTRLUA2JdkJIMku43WWJG0fugTIe2hW5P0fwF8CfzqRAyT58yRP61OcJGnmGjdAquoG4HXAg8DSqrq+686TLAa+U1X3JTkwyeoka5J8LMnsts8ZSb6c5Ma2P0l2S3JlkrVJPptk77b90Lbv2iTv6vOFJUmTo9PDoarqB1W1qqpuG793o1259xTgA23TB4GTq2oRsB44NcmzgUXAocCRwB8k2QN4I3B5VR0OvA04r32w1buB49r2eUmO7lqPJGlydVlM8cT2//ivSrIqyVUd9/0+4FnAF5I8D3iwXQoFYAVwFHAEcGk1NgGXA4cDB1XV1QBV9S2a55EcAKyrqvvafVwIvLRjLZKkSTbufSA0ZwMvrKrNXXeaZD/gGcDC9vcngH8aeb+qNifZEdgD+Maoj94F/CLw6JhdPtT2vXtM3z271iRJmlxdhrDum0h4tH4FuLKqNlXV94AfA3uPvNnOf2wB7gEWjPrc3sCGrexv5659kyxJsi7Juo0bN06wbElSV10C5C+TvCXJXu3k9m4dPnMr8BJ4bC5kV2Bukn3a908DrgNWAye1/eYArwbWAOuTHNu2HwTc3+7zkCS7tvtYClwz9sBVtaKqFlbVwgULFox9W5I0SboMYb24/f3+9ncBp27rA1X1zST/nORvgJ8CbwIeBi5LMotmEv2sdijra0lubD96flXdm+Q84BNJ3kwTHkuqakuSc4FrkhSwtqqunciXlSRNni6LKb6mz46r6j0095CMdsRW+i0Dlo1pux84cSt91wKH9alHkjS5ulyFdWR7lrBDkkuSPH0qCpMkzWxd5kBOAx5q18NaBqwcbEmSpGHQJUB+bmSjqv4O8AxEktQpQO4c2WivwJo9uHIkScOiS4B8Evi5JEuALwLnDbYkSdIw6HIZ75eA36G5q/yVVfUvgy1JkjQMugTIp6vq5cDtgy5GkjQ8ugTIT5JcRHPX+MMAVfXZgVYlSZrxugTIF2nWrfpP7Y8kSZ0C5Lh2CEuSpMc4hCVJ6qVLgIwsWDgyhFWDK0eSNCy6BAgYGpKkMTo9UIpmSfVNwCuBfQdakSRpKHRZzv0zI9vt89A/PdCKJElDocsZyGOqahPNs8klSdu5cc9AkpxJMwcyC3g+8JVBFyVJmvm6TKJ/s/39KPC5qrp1gPVIkoZElyGsHYBdq+pG4MAkLxpwTZKkIdAlQN4OXN9uf6l9LUnaznUJkFTVyB3oDwJzB1uSJGkYdAmQR5LsCJBkTsfPSJKe4rqEwYXARUleDFwCfHywJUmShkGXGwlXJbkdOAxYXlVfG3xZkqSZbtwzkCRzgTur6sPA7SPDWZKk7VuXIawPA89qt58NfGBw5UiShkWXAHl+VX0ZoKpuAH55sCVJkoZBlwB5cMzrLYMoRJI0XLoEyB1JDgZI8ivAnYMtSZI0DLpMiP8xsCzJrjTPBPnDwZYkSRoGXS7jvRc4aQpqkSQNkU53lSc5IckFSU4cdEGSpOHQ5T6QNwAvAD5EsxrvmwdelSRpxutyBnJ8Vb21qm6vqncCxw64JknSEOgSIGMv231kEIVIkoZLlwD5TpJjAJIcDnxvoBVJkoZClwA5Czg2ydXACcCZgy1JkjQMulzG+yBwxhTUIkkaIj4cSpLUiwEiSerlCQMkyZr296opq0aSNDS2NQfyL0k+C/x6kquAtO1VVS/veoAkBwK/Bvw/YCWwM/AD4LVV9eMkrwBeT3O58OVVdVGS2cBy4Dk062/9UVX9Y7uv5W0ttwBnVtXmCXxfSdIkecIAqarXACRZWlXL++w8ydOBvwA+CbwPOKeqbk7yUuAtSd5PszjjETRBcUWStcALgVuqakmSnwdW0NzA+EHg5Kq6I8lS4FTg4j61SZKenHHnQKpqeZJfTnJWkl/ruuMkOwDLgAvapqdX1c3tPq8BDgR+FVhVVT+tqgI+DhxDEygr277fB+5OsifwYFXd0e5vBXBU13okSZOry1pYvwcsBW4G/leS3++473OBjwC3t6/H3tH+KLAHcPeotruAPYF5VfXAmPYDgA0jDe3Q1ePOoJIsSbIuybqNGzd2LFWSNFFdngeyBDiiqrYk+RvgeuB/b+sD7Z3ri4EXAU8DduHxd7DPAu6hec76iL1pQmK/JPOq6qFR7bcC80cdYzZbeTpiVa2gOTth4cKF1eH7SZJ66LQWVlVtAaiqR+iwFlZV/d+q+q9VtYjmTvYLgR8keS5AkmNpJsG/DhyfZHaSAKcDnwdWA6e0ffcF9qqqDcDOSfZpD3MacF3XLypJmlxdzkA2JDm4nfz+JeBHPY/1VmBFknk0V2Gd3l6FdQlNYBRwWVV9O8l3geXtpcSb+Y874c8GLksyC1hPE06SpGnQJUBeB1yYZA7wUya4FlZVrQHWtC8ftxR8VV0JXDmmbRPNGcbYvv9AM8EuSZpmXdbC2gD4JEJJ0s9wKRNJUi8GiCSply73gayYikIkScOlyyT6DknOprlS6mGAqvq7gVYlSZrxugTI7cBc4Lfa1wUYIJK0nesSIOcCv1lVX0gyv6r63gciSXoK6TKJ/ifAn7aLI65I4gKGkqROAfJi4IF2OZPfBd4+2JIkScOgS4A8tiBhVf2ErSxgKEna/nQJkFtGNpIcCfzr4MqRJA2LLpPo5wMXJLmO5uzj1MGWJEkaBl3WwroLeNUU1CJJGiLjBkj7KNmPAnOAnwBneCmvJKnLENZHgfdX1TeSHNy+fvVgy5IkzXRdJtEXVNU3AKrqZmCvwZYkSRoGXQLksUfYtk8CHPeRtpKkp74nHMJKclC7eU2SZcAnaZ5TvmoK6pIkzXDbmgN52ajte4HjaJ6HvmCgFUmShsITBkhVvWv06yS74AOoJEmtLpfxvg54KfDDtqnwZkJJ2u51uYz3lKo6eOCVSJKGSpchqdvapdwlSXpMlzOQtwF/nWQt8BBAVX1koFVJkma8LgHyfuCvgfWMWtpdkrR96xIge1TV8oFXIkkaKl3mQL6a5JCBVyJJGipdzkCeBVyR5Faa1Xirql4+2LIkSTNdl+eBLJ6KQiRJw6XLjYSXMmbyvKq8kVCStnNdhrDOan/vBPxP4M7BlSNJGhbjTqJX1f3tz11V9R58vK0kiQkujphkR+BpA6pFkjREusyBrKKZAwmwO3D+oIuSJM18Xa7Cetl4fSRJ259tPZHwHWx96ZJ/raqPDq4kSdIw2NYZyNVjXj8LeDdw4uDKkSQNi209kfBbI9tJDgZeBxxTVXdMRWGSpJmtyyT6S4DXA6+oqnsHX5IkaRhsM0CSnAQsBhZX1U+mpiRJ0jDY1iT62cA5wKXAe5M89l5VvX68HSc5HzgEmEMzd3IbsJzmcuBbgDOranOSM4BX0kzYn19VVyfZDVgJ7AXcDyypqruSHAq8D9gCrKmqd0z8K0uSJsO2zkCuaH8mLMli4N6qWpRkHnAdTRCcXFV3JFkKnJrkemARcCgwG/hckhtplk+5vA2T5wHnJTmFJoiOq6r7kpyX5OiqurZPjZKkJ2dbk+i3PYn9/jNwY7ufh5JsAB4ZNQG/Aric5qzj0qoqYFOSy4HDgYOq6m3t57/VnpEcAKyrqvvafVwIvAEwQCRpGkxoKZOuqmp9Vd0DkOR3gL8FNox6fzNNeO0B3D3qo3cBewKPjtnlQ9vo+zOSLEmyLsm6jRs3TsK3kSRtzUACZESS36AZnjofmD+qfTbNPMY9wIJRH9mbUUEzys5d+1bViqpaWFULFyxYMPZtSdIkGViAJDkM+G3gTVW1Cdg5yT7t26fRzIusBk5q+88BXg2sAdYnObZtP4hm/uRW4JAku7b7WApcM6j6JUnb1uV5IBPWXi31OeCbwOr2Cq4zgMuSzALWA2e1V2F9rZ04h+YqrHuTnAd8Ismb+Y+rsLYkORe4JkkBa51Al6TpM5AAqaobaVbuHeuIrfRdBiwb03Y/W1kyparWAodNUpmSpCdhoHMgkqSnLgNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi8GiCSpFwNEktSLASJJ6sUAkST1YoBIknoxQCRJvRggkqReDBBJUi9DFyBJ3pvkhiRrk7xwuuuRpO3VjtNdwEQkORKYV1WHJdkdWJXk6KraPN21SVPt9nOfO90laAb6hbf//ZQda9jOQI4EVgJU1f3AWsC/RZI0DYYtQPYA7h71+i5gz2mqRZK2a0M1hAXcAywANrav9wb+/+gOSZYAS9qXDyT5x6kr7ylvPvCj6S5iJsgHf3+6S9DP8s/miHdkMvbyzC6dUlWTcbApkeQo4CVV9aZ2DuQq4LecA5kaSdZV1cLprkMayz+b02OozkCq6otJjkqytm36E8NDkqbHUAUIQFWdPd01SJKGbxJd02vFdBcgPQH/bE6DoZoDkSTNHJ6BSJJ6MUDUiUvIaKZKcnyS7yV5/nTXsr0Zukl0TT2XkNFMVlWfSfKC6a5je+QZiLpwCRlJj2OAqAuXkJH0OAaIuhhZQmbE3sCGaapF0gxhgKiLLwGnALRzIIcBU7dmtKQZyUl0jcslZCRtjTcSSpJ6cQhLktSLASJJ6sUAkST1YoBIknoxQCRJvRgg0jYk2T/J99tFJL/S/j4mjY8nedzfoSRXT2OtF0zHsbV98j4QaXyfqqqzAJI8A7gIeKSq/mB6y5KmlwEiTUBV/TDJq4H/k2RpVS1OcinwbOD6qnonkCTvBl4EzAE+W1V/lmQR8PyquoCm02Lgae2ujwZ2At4AXAE8DLyxqv42yQnACcB8mnXIPlRVNyfZE1gG7AM8Cnx6Cv4TSI9xCEuaoKp6kOYf7BH7AocDH2hf/xfg2qo6AjgU2D3JiePs9sdVtZgmiP4K+A3gW0l+HTiHZv2xHYGfB85tP3MhcH5VLaJZMXnfJ/nVpAkxQKQJSjIb2HlU0xnA24CRBxp9p6q+DFDNUg8XAy+mCZ3Rf+d2GrW9uu1/LXA7cDYwF3ghcHZVLRr5AV7WfmZuVa0bdZxLJus7Sl0YINIEJNkZ+BjNMNOIu6vqXcA729cHtMNVJAnwWuAG4IfA89r2OcBJo/bxcNu+G80ZyBrgLODrwOuTzG3fPwp4d/uZTUkWtu2z2uNIU8Y5EGl8J7RPvJtFM6dxWVWtTHJckl2AlUl+Afhc2//bwGFJ3gHMBlZV1acAkmxM8lXgXuArWznWC2iCaDdgSVXdlORTwOokjwDfBUYm7/8I+Eg7F1LA5cB/m+wvLz0RF1OUJPXiEJYkqRcDRJLUiwEiSerFAJEk9WKASJJ6MUAkSb0YIJKkXgwQSVIv/w52QKpJ1r/yDQAAAABJRU5ErkJggg==\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# Target 分布，两类样本分布严重不均衡，只有1.4%的样本Disbursed为1\n",
    "sns.countplot(train['Disbursed']);\n",
    "plt.xlabel('Disbursed');\n",
    "plt.ylabel('Number of occurrences');"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2、特征工程\n",
    "\n",
    "### （1）合并数据\n",
    "\n",
    "    合成一个总的data，方便一起做特征工程\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/ipykernel_launcher.py:4: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
      "of pandas will change to not sort by default.\n",
      "\n",
      "To accept the future behavior, pass 'sort=False'.\n",
      "\n",
      "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
      "\n",
      "  after removing the cwd from sys.path.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "(124737, 27)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train['source']= 'train'\n",
    "test['source'] = 'test'\n",
    "\n",
    "data = pd.concat([train, test],ignore_index=True)\n",
    "data.shape"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （2）异常值处理\n",
    "\n",
    "    检查数据质量，如异常点、缺省值、空值等等"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "City                      1401\n",
       "DOB                          0\n",
       "Device_Type                  0\n",
       "Disbursed                37717\n",
       "EMI_Loan_Submitted       84901\n",
       "Employer_Name              113\n",
       "Existing_EMI               111\n",
       "Filled_Form                  0\n",
       "Gender                       0\n",
       "ID                           0\n",
       "Interest_Rate            84901\n",
       "Lead_Creation_Date           0\n",
       "Loan_Amount_Applied        111\n",
       "Loan_Amount_Submitted    49535\n",
       "Loan_Tenure_Applied        111\n",
       "Loan_Tenure_Submitted    49535\n",
       "LoggedIn                 37717\n",
       "Mobile_Verified              0\n",
       "Monthly_Income               0\n",
       "Processing_Fee           85346\n",
       "Salary_Account           16801\n",
       "Source                       0\n",
       "Var1                         0\n",
       "Var2                         0\n",
       "Var4                         0\n",
       "Var5                         0\n",
       "source                       0\n",
       "dtype: int64"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.apply(lambda x: sum(x.isnull()))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    其中，对于Loan Tenure特征，把不合理的贷款年限，设为缺失值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "data['Loan_Tenure_Applied'].replace([10,6,7,8,9],value = np.nan, inplace = True)\n",
    "data['Loan_Tenure_Submitted'].replace(6, np.nan, inplace = True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （3）合并取值多的特征\n",
    "\n",
    "    合并City、Employer_Name、Salary_Account、Source，取前几个重要的保留，其余剩下的合并成一个others特征值。因为LightGBM对类别特征建立直方图时，当特征取值数目超过max_bin(默认255)，会去掉样本数目少的类别，所以我这里合并一下也可以。\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "cat_features = ['City','Employer_Name','Salary_Account', 'Source']\n",
    "rare_thresholds = [100, 30, 40, 40]\n",
    "j=0\n",
    "for col in cat_features:\n",
    "    #每个取值的样本数目\n",
    "    value_counts_col =  data[col].value_counts(dropna=False)\n",
    "\n",
    "    #样本数目小于阈值的取值为稀有取值\n",
    "    rare_threshold = rare_thresholds[j]\n",
    "    value_counts_rare = list(value_counts_col[value_counts_col < rare_threshold ].index)\n",
    "\n",
    "    #稀有值合并为：others\n",
    "    rare_index = data[col].isin(value_counts_rare)\n",
    "    data.loc[ data[col].isin(value_counts_rare), col] = \"Others\"\n",
    "    \n",
    "    j = j+1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （4）日期型数据\n",
    "\n",
    "    把日期转换为年龄\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "#创建一个年龄的字段Age\n",
    "data['Age'] = pd.to_datetime(data['Lead_Creation_Date']).dt.year - pd.to_datetime(data['DOB']).dt.year\n",
    "\n",
    "#把原始的DOB字段去掉:\n",
    "data.drop(['DOB', 'Lead_Creation_Date'],axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （5）丢弃不重要特征\n",
    "\n",
    "    把没有实际意义的特征丢掉，如LoggedIn\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "#不能用于预测特征，drop\n",
    "data.drop('LoggedIn',axis=1,inplace=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （6）类别特征编码成数值\n",
    "\n",
    "    不过，LightGBM无需one-hot编码\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.preprocessing import LabelEncoder\n",
    "le = LabelEncoder()\n",
    "feats_to_encode = ['City', 'Employer_Name', 'Salary_Account','Device_Type','Filled_Form','Gender','Mobile_Verified','Source','Var1','Var2','Var4']\n",
    "for col in feats_to_encode:\n",
    "    data[col] = le.fit_transform(data[col].astype(str))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （7）还原数据\n",
    "\n",
    "    数据处理完成后，还原之前合并的train和test数据\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>City</th>\n",
       "      <th>Device_Type</th>\n",
       "      <th>Disbursed</th>\n",
       "      <th>EMI_Loan_Submitted</th>\n",
       "      <th>Employer_Name</th>\n",
       "      <th>Existing_EMI</th>\n",
       "      <th>Filled_Form</th>\n",
       "      <th>Gender</th>\n",
       "      <th>ID</th>\n",
       "      <th>Interest_Rate</th>\n",
       "      <th>...</th>\n",
       "      <th>Monthly_Income</th>\n",
       "      <th>Processing_Fee</th>\n",
       "      <th>Salary_Account</th>\n",
       "      <th>Source</th>\n",
       "      <th>Var1</th>\n",
       "      <th>Var2</th>\n",
       "      <th>Var4</th>\n",
       "      <th>Var5</th>\n",
       "      <th>source</th>\n",
       "      <th>Age</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>192</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>ID000002C20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>20000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15</td>\n",
       "      <td>1</td>\n",
       "      <td>13</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>train</td>\n",
       "      <td>37</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>44</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>6762.9</td>\n",
       "      <td>227</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>ID000004E40</td>\n",
       "      <td>13.25</td>\n",
       "      <td>...</td>\n",
       "      <td>35000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>6</td>\n",
       "      <td>3</td>\n",
       "      <td>13</td>\n",
       "      <td>train</td>\n",
       "      <td>30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>52</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>192</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>ID000007H20</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>22500</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>train</td>\n",
       "      <td>34</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>52</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>192</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>ID000008I30</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>35000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>37</td>\n",
       "      <td>9</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>10</td>\n",
       "      <td>train</td>\n",
       "      <td>28</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>0.0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>192</td>\n",
       "      <td>25000.0</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>ID000009J40</td>\n",
       "      <td>NaN</td>\n",
       "      <td>...</td>\n",
       "      <td>100000</td>\n",
       "      <td>NaN</td>\n",
       "      <td>15</td>\n",
       "      <td>6</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>17</td>\n",
       "      <td>train</td>\n",
       "      <td>31</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 25 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   City  Device_Type  Disbursed  EMI_Loan_Submitted  Employer_Name  \\\n",
       "0    15            1        0.0                 NaN            192   \n",
       "1    44            1        0.0              6762.9            227   \n",
       "2    52            1        0.0                 NaN            192   \n",
       "3    52            1        0.0                 NaN            192   \n",
       "4     6            1        0.0                 NaN            192   \n",
       "\n",
       "   Existing_EMI  Filled_Form  Gender           ID  Interest_Rate  ...  \\\n",
       "0           0.0            0       0  ID000002C20            NaN  ...   \n",
       "1           0.0            0       1  ID000004E40          13.25  ...   \n",
       "2           0.0            0       1  ID000007H20            NaN  ...   \n",
       "3           0.0            0       1  ID000008I30            NaN  ...   \n",
       "4       25000.0            0       1  ID000009J40            NaN  ...   \n",
       "\n",
       "   Monthly_Income  Processing_Fee  Salary_Account  Source  Var1  Var2  Var4  \\\n",
       "0           20000             NaN              15       1    13     6     1   \n",
       "1           35000             NaN              17       1     8     6     3   \n",
       "2           22500             NaN              37       9    13     1     1   \n",
       "3           35000             NaN              37       9    13     1     3   \n",
       "4          100000             NaN              15       6    13     1     3   \n",
       "\n",
       "   Var5  source  Age  \n",
       "0     0   train   37  \n",
       "1    13   train   30  \n",
       "2     0   train   34  \n",
       "3    10   train   28  \n",
       "4    17   train   31  \n",
       "\n",
       "[5 rows x 25 columns]"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/pandas/core/frame.py:3940: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  errors=errors)\n"
     ]
    }
   ],
   "source": [
    "train = data.loc[data['source']=='train']\n",
    "test = data.loc[data['source']=='test']\n",
    "\n",
    "train.drop('source',axis=1,inplace=True)\n",
    "test.drop(['source','Disbursed'],axis=1,inplace=True)\n",
    "\n",
    "# 保存特征工程的结果\n",
    "train.to_csv('HappyBank/data/FE_train.csv',index=False)\n",
    "test.to_csv('HappyBank/data/FE_test.csv',index=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3、LightGBM模型\n",
    "\n",
    "### （0）导入相关包和数据\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/.local/lib/python3.7/site-packages/lightgbm/__init__.py:48: UserWarning: Starting from version 2.2.1, the library file in distribution wheels for macOS is built by the Apple Clang (Xcode_8.3.3) compiler.\n",
      "This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.\n",
      "Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.\n",
      "You can install the OpenMP library by the following command: ``brew install libomp``.\n",
      "  \"You can install the OpenMP library by the following command: ``brew install libomp``.\", UserWarning)\n"
     ]
    }
   ],
   "source": [
    "# 首先 import 必要的模块\n",
    "import lightgbm as lgbm\n",
    "from lightgbm.sklearn import LGBMClassifier\n",
    "\n",
    "from sklearn.model_selection import GridSearchCV"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 读取数据\n",
    "train = pd.read_csv(\"HappyBank/data/FE_train.csv\")\n",
    "\n",
    "y_train = train['Disbursed'] \n",
    "X_train = train.drop([\"ID\", \"Disbursed\"], axis=1)\n",
    "\n",
    "#保存特征名字以备后用（可视化）\n",
    "feat_names = X_train.columns "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （1）调参n_estimators\n",
    "\n",
    "    先预设eta=0.1，粗调基学习器的数目n_estimators。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "MAX_ROUNDS = 10000\n",
    "\n",
    "#categorical_feature = ['City', 'Employer_Name', 'Salary_Account','Device_Type','Filled_Form','Gender','Mobile_Verified','Source','Var1','Var2','Var4']\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          #'categorical_feature': names:'City', 'Employer_Name', 'Salary_Account','Device_Type','Filled_Form','Gender','Mobile_Verified','Source','Var1','Var2','Var4',\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          #'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 60,\n",
    "          'max_depth': 6,\n",
    "          #'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.7,\n",
    "          'verbosity':5\n",
    "         }\n",
    "\n",
    "\n",
    "# 直接调用lightgbm内嵌的交叉验证(cv)，可对连续的n_estimators参数进行快速交叉验证\n",
    "def get_n_estimators(params , X_train , y_train , early_stopping_rounds=10):\n",
    "    lgbm_params = params.copy()\n",
    "#     lgbm_params['num_class'] = 9\n",
    "     \n",
    "    lgbmtrain = lgbm.Dataset(X_train , y_train )\n",
    "    cv_result = lgbm.cv(lgbm_params , lgbmtrain , num_boost_round=MAX_ROUNDS , nfold=5,  metrics='auc' , \n",
    "                                early_stopping_rounds=early_stopping_rounds, seed=3 )\n",
    "     \n",
    "    print('best n_estimators:' , len(cv_result['auc-mean']))\n",
    "    print('best cv score:' , cv_result['auc-mean'][-1])\n",
    "     \n",
    "    return len(cv_result['auc-mean'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/.local/lib/python3.7/site-packages/lightgbm/basic.py:755: UserWarning: categorical_feature keyword has been found in `params` and will be ignored.\n",
      "Please use categorical_feature argument of the Dataset constructor to pass this parameter.\n",
      "  .format(key))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best n_estimators: 42\n",
      "best cv score: 0.825925822500098\n"
     ]
    }
   ],
   "source": [
    "n_estimators_1 = get_n_estimators(params , X_train , y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （2）调参max_depth\n",
    "    \n",
    "    此时，eta=0.1、n_estimators=42。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 24 candidates, totalling 120 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   10.3s\n",
      "[Parallel(n_jobs=4)]: Done  64 tasks      | elapsed:  1.0min\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8315053967020931\n",
      "{'max_depth': 3}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Done 120 out of 120 | elapsed:  2.1min finished\n"
     ]
    }
   ],
   "source": [
    "# 1、交叉验证5折\n",
    "from sklearn.model_selection import StratifiedKFold\n",
    "kfold = StratifiedKFold(n_splits=5, shuffle=True, random_state=10)\n",
    "\n",
    "\n",
    "# 2、设置参数范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 70,\n",
    "          #'max_depth': 6,\n",
    "          #'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.7,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "max_depth_s = range(3,50,2) \n",
    "tuned_parameters = dict( max_depth = max_depth_s)\n",
    "\n",
    "\n",
    "# 3、交叉验证找最优参数\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "grid_search = GridSearchCV(lg, n_jobs=4, param_grid=tuned_parameters, cv = kfold, scoring=\"roc_auc\", verbose=5, refit = False)\n",
    "grid_search.fit(X_train , y_train)\n",
    "\n",
    "\n",
    "# 4、打印结果\n",
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**画图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEICAYAAACj2qi6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xt8XHWd//HXJ/fm2ubSNgF6SVOwCbSUVmhoWsGyolVQUfmthSIi4Cqi7v5U6Kq/RVF2RUCLbtWKF3YV2V0REO8tvQLlUpCltEDv0FvatKVN0kuay+f3x5yUMW3TSZPJmZm8n49HH2TO98yZT06Hefd8v/P9HnN3REREYpEWdgEiIpI8FBoiIhIzhYaIiMRMoSEiIjFTaIiISMwUGiIiEjOFhoiIxEyhISIiMVNoiIhIzDLCLqCvlZaW+qhRo8IuQ0QkqTz//PO73b3sZPulXGiMGjWKlStXhl2GiEhSMbPXY9lP3VMiIhIzhYaIiMRMoSEiIjFTaIiISMwUGiIiEjOFhoiIxEyhISIiMVNoBLbtO8Sdf3qV7fsOhV2KiEjCUmgEDrS0MW/JBpatbQi7FBGRhKXQCIwdms+wwmyeWL877FJERBKWQiNgZkytKuWpDXvo6PCwyxERSUgKjSh1VaXsPXCENTsawy5FRCQhKTSi1FWVArB8nbqoRESOR6ERZWhhDmcNK+BJjWuIiByXQqOLqVWlPLt5L4db28MuRUQk4Sg0upg2tpQjbR08t3lv2KWIiCQchUYXF1QWk5lu+uqtiMhxKDS6yM3K4LwRQ3hCg+EiIseIW2iY2R1mtszMlprZhV3aZpnZCjNbbma3B9tyzexBM3vKzJ40s7pge7WZLTazJWY2z8wy41Vzp7qqUlZvb2RPc0u8X0pEJKnEJTTMbAaQ6+7TgcuBb3T5sJ8NzASmA2VmNh4YDtzr7hcCNwBXBfveBVzj7hcBq4Hr4lFztLqxka/ePrlhT7xfSkQkqcTrSmMGcB+Au+8HlgLnRLXfDWwBNgOXAVvcfaO7P2VmdwErgZ+aWRZwwN23BM+bD1wSp5qPGn/6YApyMnhSXVQiIn8jXqFRAuyMelwPDIVINxQwB6gCRhG5ojj6/VZ3/wIwAbgTKAN2RbW1Ahlxqvmo9DTjwjElPLF+N+5aUkREpFO8QmMPkQ/8TsN568P/bGCxu9d75BN5PXCLmY0Lrixw93XABqANKO08SNDFdcwECjO70cxWmtnKhoa+WaW2bmwZ2/YdYtPuA31yPBGRVBCv0FgEXAtgZkVExi5WBW0bgPPNrPOK4XLgYLDPNcFzBgMj3X0nkGdm5cG+NwALur6Yu89398nuPrmsrKxr8ymZFiwpotnhIiJviUtXj7svNLNLzGxpsGkOMNnMJrr7PDO7H1hsZhAZ27geOAL82MyuAjqAW4Ln3gr80szSiAyEfz4eNXc1siSX04cMYvm63cyuHdUfLykikvDiNj7g7rceZ/OKoO0h4KHjtH/8OMd5GXhn31Z3cmZGXVUpv39pB23tHWSka0qLiIg+CbtRN7aUppY2/nfr/rBLERFJCAqNblw4phQzjWuIiHRSaHSjOC+LmopCLSkiIhJQaJxEXVUZL7zxJs0tbWGXIiISOoXGSUwbW0pbh/PsJi0pIiKi0DiJSSOHkJ2RplvAioig0DipnMx0zh9drHENEREUGjGpqypl3a5m6vcfDrsUEZFQKTRicHSpdH31VkQGOIVGDMYNL6QkL0u3gBWRAU+hEYO0NOPCqlItlS4iA55CI0bTqkppaGph7c7msEsREQmNQiNGU4NxjeXr+uZ+HSIiyUihEaPTBg+isjRP4xoiMqApNHqgbmwpz2zcS0vbMTcPFBEZEBQaPVBXVcqh1nb++sa+sEsREQmFQqMHpowpIT3NNDtcRAYshUYPFOZkMuH0IpZrXENEBiiFRg/VjS1j1dZ97D/YGnYpIiL9TqHRQ3VVpXQ4rNioqw0RGXgUGj00ccRg8rLStVS6iAxICo0eykxPY0plieZriMiApNA4BXVjS3l9z0G27D0YdikiIv1KoXEK6qoiS4roakNEBhqFximoGprPsMJszdcQkQFHoXEKzIy6qjKe3LCbjg4tlS4iA4dC4xTVjS1h38FWVm9vDLsUEZF+o9A4RVODcY3l67VUuogMHAqNUzS0IIe3DS/QuIaIDCgKjV6oqypl5eY3OXRES6WLyMCg0OiFqWNLOdLewXOb94ZdiohIv1Bo9MIFo4vJTDee1HwNERkgFBq9kJuVwcQzhrBi456wSxER6RcKjV6aUlnMy9v203hYS6WLSOpTaPTSlDEldDg8t0njGiKS+hQavXTeiCFkZaSxYoO6qEQk9cUtNMzsDjNbZmZLzezCLm2zzGyFmS03s9ujtt9tZkvM7Ckze0/U9iVRf94br5pPRU5mOueNGKxxDREZEDLicVAzmwHkuvt0MysCHjazS929s+N/NjAT2Af8wMzGA5XAXne/yMxygQVm9uegxh3u/tF41NoXplSWMPfxdew/2EpRbmbY5YiIxE28rjRmAPcBuPt+YClwTlT73cAWYDNwWfDzOuCHwXMOAlsBAyYAk4KrjK/Fqd5eqa0swR2e2aSrDRFJbfEKjRJgZ9TjemAoQHAVMQeoAkYBVwHt7r7a3fcE+1wOrHD3duA1oMbdLwLSzOxDcar5lJ07YjDZGWnqohKRlBev0NgDlEU9Hg7sCn4+G1js7vXu7sB64JbOHc3sYmCau38XwN0bo7q1HgQmdn0xM7vRzFaa2cqGhv5fQDA7I51JI4doMFxEUl68QmMRcC1AMKYxHVgVtG0AzjezzvGUy4GDwb7TiXRXfSl4nGZmC80sO9j3SuD5ri/m7vPdfbK7Ty4rK+va3C9qK0t4tb6JNw8cCeX1RUT6Q1xCw90XAh1mthT4LfBVYLKZfTrogrofWGxmy4E64DtmNg14DJgUtC0BRgLfA5aY2TIgy90fjkfNvVU7pgTQuIaIpLa4fHsKwN1vPc7mFUHbQ8BDXdqWA0XHec4m4NG+ra7vjT99MIMy01mxYQ/vPrs87HJEROJCk/v6SFZGGpNHDeHpjZoZLiKpS6HRh6ZUlvDazib2NLeEXYqISFwoNPpQ57iGrjZEJFUpNPrQOacVkZuVztOaryEiKUqh0Ycy09N4+6hiTfITkZSl0OhjtWNKWL+rmV1Nh8MuRUSkzyk0+lhtpcY1RCR1KTT6WE1FIfnZGRrXEJGUpNDoYxnpaZw/upintQ6ViKQghUYc1FaWsHH3AXY2alxDRFKLQiMOphwd19DVhoikFoVGHFRXFFKYk6Gl0kUk5Sg04iA9zTh/dInma4hIylFoxMmUymJe33OQ7fsOhV2KiEifUWjEyVvrUOlqQ0RSh0IjTsYNL2RwbqbGNUQkpSg04iQtzbhgtNahEpHUotCIoymVJWx98xBb9h4MuxQRkT6h0IgjjWuISKpRaMTRmUMLKM7LUheViKQMhUYcdY5rPLNxL+4edjkiIr2m0Iiz2jElbNt3iC17NV9DRJKfQiPOOu+vsWLj7pArERHpPYVGnFUNzac0P0s3ZRKRlKDQiDMz44LKElZs2KNxDRFJegqNflBbWUJ942E279F8DRFJbgqNftA5X0NLiohIslNo9IPK0jzKCrI1yU9Ekl63oWFmQ7s8LotvOanJzKitjNxfQ+MaIpLMThgaZvY+4CtdNv+Tmb0/viWlptoxJTQ0tbCh4UDYpYiInLLurjQ+B8zpsu124LPxKyd11eq+4SKSAroLjTR3/5t/Frv7QcDiW1JqGlmSy/DCHK1DJSJJrbvQyDvBdnXKnwIzo3ZMCc9oXENEklh3ofGKmU2P3mBmU4BN8S0pddVWlrC7+QjrdzWHXYqIyCnpLjTmAHea2UfM7LRgAPwejh3nkBhNOboOlbqoRCQ5nTA03L0euBQ4HfgqUAW8190b+qm2lHNG8SBOGzxIk/xEJGlldNfo7vuB75zKgc3sDqCOyBjIHHd/KqptFnAz0AYscfevBtvvBiYBWcDt7v5HM6sG/p3IAPwa4HPu3noqNYXNzJhSWcKiV3fS0eGkpek7BSKSXE4YGmb2MMcOehcRCYBnuzuomc0Act19upkVAQ+b2aVRH/azgZnAPuAHZjYeqAT2uvtFZpYLLDCzPwN3Ade4+xYzuwm4DvhRz3/VxFA7poSHXtjK2l1NvG14YdjliIj0yAlDw90/2HWbmQ0HfgNceJLjzgDuC46z38yWAucALwTtdwNbgD1BDXOAdmB58JyDZrYVyAQOuPuW4HnzgQdI4tCYUlkMRNahUmiISLLp0dpTwThHLF1DJcDOqMf1wFCA4CpiDpExklHAVUC7u6929z3BPpcDK4BiYFfU67dyki61RHf6kFzOKB6kSX4ikpR6FBpmdiGRLqWT2QNEr1M1nLc+/M8GFrt7vUcmLKwHbol6jYuBae7+3eA4pVFtmUSuSLrWdaOZrTSzlQ0NiT9OP2nEEF7auj/sMkREeqy7taceNrPfBH8eNrPfAFcDn4nhuIuAa4PjFAHTgVVB2wbgfDPrvGK4HDgY7DsduAz4EoC7HwHyzKw82PcGYEHXF3P3+e4+2d0nl5Ul/pqKNRVF7Nh/mL0HjoRdiohIj8Q8pmFmg4AbgZ8Bl3R3UHdfaGaXBGMZEOmOmmxmE919npndDyw2M4iMbVxvZtOAx4AXo9o+DtwK/NLM0oDVwOd7/msmluqKyFjG6u37mTY28UNORKTTSccHzKyAyNdjrwfuIDJ346Tc/dbjbF4RtD0EPNSlbTmRb2cdzztjec1kUXM0NBoVGiKSVLrrnio1s28CjwCbgfXufp+7HzOmID0zODeL0wYPYvX2xrBLERHpke4Gwm8DPgh81d0fIDIRT/pIdUUhq7drMFxEkkt3y4h8Bng3cLWZLSL4yqz0jZqKQjbtPsCBFmWxiCSPbr9y6+5vuPunicylWGZmTwR39JNeqqkowh1erVcXlYgkj5jmabj7Dnf/JyLdVbXxLWlg6BwMX6NxDRFJIj2aXR2scPvlONUyoJQX5TAkN1OD4SKSVHo0I1z6jplRU1Gk0BCRpKLQCFF1RSGv1TfR2t4RdikiIjFRaISopqKQI+0duv2riCQNhUaIomeGi4gkA4VGiEaX5jMoM12T/EQkaSg0QpSeZrytvEBXGiKSNBQaIaupKOSV7Y10dHS9s66ISOJRaISspqKIppY2tr55KOxSREROSqERspqoe2uIiCQ6hUbIzhxWQHqaaVxDRJKCQiNkOZnpVJXl60pDRJKCQiMB1FQU6kpDRJKCQiMBVFcUsquphYamlrBLERHplkIjAdRURG6Nri4qEUl0Co0EUK3lREQkSSg0EkDRoEzOKB6kGzKJSMJTaCSImvIi1uxQaIhIYlNoJIiaikI27T5Ac0tb2KWIiJyQQiNBdI5rvKKrDRFJYAqNBHH0G1Tb9A0qEUlcCo0EMawwm5K8LH2DSkQSmkIjQZgZ1ZoZLiIJTqGRQGoqili3q4kjbR1hlyIiclwKjQRSU1FIa7uzdmdT2KWIiByXQiOBdN5bQ/M1RCRRKTQSyKiSPPKy0jUzXEQSlkIjgaSlGePKC7VwoYgkLIVGgqmuKGTN9kY6OjzsUkREjqHQSDA1FYUcONLO63sPhl2KiMgxFBoJRvfWEJFEptBIMGOH5ZORZprkJyIJKW6hYWZ3mNkyM1tqZhd2aZtlZivMbLmZ3R61/f1mttnMzu2y/5KoP++NV82JIDsjnbHDChQaIpKQMuJxUDObAeS6+3QzKwIeNrNL3b012GU2MBPYB/zAzMa7+0vu/qiZTexyrCxgh7t/NB61JqKaikKWvLYLd8fMwi5HROSoeF1pzADuA3D3/cBS4Jyo9ruBLcBm4LLg5xOZAEwKrjK+FpdqE0xNRSG7m4/Q0NQSdikiIn8jXqFRAuyMelwPDAUws1xgDlAFjAKuAtq7OdZrQI27XwSkmdmHuu5gZjea2UozW9nQ0NAnv0CY3hoMVxeViCSWeIXGHqAs6vFwYFfw89nAYnevd3cH1gO3nOhA7t4Y1a31IDDxOPvMd/fJ7j65rKysa3PSGVdeAOgbVCKSeOIVGouAawGCMY3pwKqgbQNwvpl1jqdcDhx3UoKZpZnZQjPLDjZdCTwfp5oTRkFOJiNLcnWlISIJJy4D4e6+0MwuMbOlwaY5wGQzm+ju88zsfmBxMMi7Bbj+BMfpMLPvAUvMrBV40t0fjkfNiaamopCXtyk0RCSxxCU0ANz91uNsXhG0PQQ8dILn3dbl8aPAo31dX6KrqSjiD6vqaTzcSmFOZtjliIgAmtyXsKo7l0lXF5WIJBCFRoLqvLeGxjVEJJEoNBLU0IIcygqy9Q0qEUkoCo0EVhMsky4ikigUGgmspqKQ9buaaWnrbu6jiEj/UWgksJqKIto6nLX1zWGXIiICKDQSWnV552C4xjVEJDEoNBLYiOJc8rMz9A2qXjjQ0sb8ZRt4511LmLdkvW6jK9JLcZvcJ72XlmZUlxfqSuMUNB1u5T9WvM59yzfy5sFWKkvzuPNPr/H0xr3cc+UESvOzT34QETmGQiPBVVcU8l/PbaG9w0lP0701Tmb/wVZ++uQmfvbkJhoPt3HxWWXcPGMsE88YzK+e3cJtj61m5tzl3PvRiUypLAm7XJGko9BIcDUVhRxqbWfT7gNUDc0Pu5yEtffAEX7yxEbuf+p1mlvaeFf1MG5+51jOOb3o6D6zLhjBxBGDuemBF5j146f5/CVnctPFVQpjkR5QaCS4t+6tsV+hcRwNTS38ePlGfvH06xxqbWfm2eV85p1VjAu+RNDVuPJCHvtMHV955GXuWbCWZzbt4Tv/51yGFuT0c+UiyUmhkeDGDssnKz2NNTsaef+5p4VdTsKo33+YHy3bwAPPvEFreweXT6jgpourGDus4KTPzcvO4J4rJ1BbWcL/++3LzJz7BHP//lymVpX2Q+UiyU2hkeAy09M4c3i+ZoYHdje38N2Fa/nv57bS7s4HJ57GTRdXMbo0r0fHMTOufPsZnDtiMDf98gWu/skz3HxxFZ+75My4d1e1tnfQfLiNpsNtNLW00nS4jUNHNIFTeq+sIJuzTys6+Y69oNBIAjXlRSx4ZSfuTnAPkgHH3Xn4r9v4+u/WcKCljQ9POoNPXzSGM4pze3XcM4cV8OhnpvIvj67m3kXreWbTXu796ESGFfasu2rfwSOs2dHImu2NbH3zEE2H22huaaW5JRIOzYfbaAy2HW7t6FXNIifyvvHlfH/WeXF9DYVGEqiuKOS/Vm6hvvEw5UWDwi6n323bd4h//s0qlq5t4LwRg7nzw+OpGnrybqhY5WZl8O2PTKB2TAlffvhl3jN3OfdcOYGLzhp6zL7uztY3D7F6e+PRkHhlRyPb9h06uk9+dgaFORkU5GSSn5NBcV4WI4pzKcjJpCAng4LsDPI724N9B2WlD9h/EEjfGTwo/vfeUWgkgaPLpG9rHFCh0dHh/OKZ1/nWH1/Fgdsuq2Z27ai4dR9dcd7pjD99MJ954AWu/dlzfOqiMcw8u5xXdvxtQDS1tAGQZlBZls+kkUOYXTuS6vJCxpUXUlagOSCSuhQaSWBceSFmkXtrXFI9LOxy+sWGhmZufeglntv8JtPGlnLHB8/pdVdULKqG5vPITVP52mOr+cGSDfxgyQYAcrPSGVdeyAcmnkZ1RSHV5YWcOayAQVnpca9JJJEoNJJAXnYGo0vyBsTM8Nb2DuYv28jcx9cxKDOduz4ygQ+dd1q/dt3kZKbzr1eM533jK3jz4BFqKooYWZxLmuZziCg0kkV1RSF/fWNf2GXE1cvb9vOlX7/Emh2NzDxnOLddXhPq/Al9BVfkWAqNJHH+6GJ+99IOVm7ey+RRxWGX06cOt7Yz9/F1zF+2keK8LH549Xm8++zysMsSkePQKrdJ4sOTTqckL4u5j68Lu5Q+9eymvcycu5wfLNnAh847jYX/+A4FhkgCU2gkidysDG6cXsnydbt5/vU3wy6n1xoPt/KVR1Zx5Y9WcKS9g1984gLu/PAEinLj/5VBETl1Co0kcvWUkRTnZXFvkl9t/GV1Pe+6ZxkPPPMG100dzV/+cTp1YzV+IJIMNKaRRPKyM7h+2mju/NNrvLhlH+eeMTjsknpkV+NhbntsNX9YVc/bhhfww9mTku53EBnodKWRZK6pHcXg3EzmLlwbdikxc3cefPYNZtyzlIWv7OKLl57FYzfXKTBEkpCuNJJMfnYGN0yr5Nt/fo3/3bKPCQn+wbuxoZk5v1nFM5v2csHoYv71inOoLNMS7yLJSlcaSeia2pEUDcrke4sSd2yjtb2Df1+8nnfPXc6aHY382xXn8KsbpigwRJKcrjSSUEFOJtfXjebuBWt5edv+uC+F3FMvbtnHrQ+9xKv1TZFJepfVMLSHq8aKSGLSlUaS+tjUURTmZCTUvI0DLW18/bE1XDHvSfYdbGX+7EnMu2qSAkMkhehKI0kV5mRyXd1ovrtwHau37z96W9h4amvv4EBL+9EbBzW3RO4T0dTSxt7mFn68fBPb9h1i9pSRfOndZ1GQozkXIqlGoZHEPj51ND95YhP3Pr6OH82e3CfHPNzazjd//wobGpr/JhSaD7dxqLX7u8tVDc3nf/6hlren2DInIvIWhUYSKxqUycenjubex9fxyo5GxpUX9up47R3OZ3/1Vxa8spNJI4ZQkpfFyJI88rMzKMjJID87+BN1I6FIW+RmQmUF2XG/VaqIhEuhkeQ+MXU0P31iE99btI55V0065eO4O7f9djV/WbOT2y6r5tqpo/uwShFJFRoIT3JFuZlce+Eo/rCqntfqm075OPOWbOA/n36dT76jUoEhIiek0EgBn6gbTV5WOvee4ryNXz+/lW//+TU+cG4Ft1z6tj6uTkRSSdxCw8zuMLNlZrbUzC7s0jbLzFaY2XIzuz1q+/vNbLOZnRu1rdrMFpvZEjObZ2b6Sk4XQ/Ky+NiFo/jDqh2s29mzq42laxu49aGXmFpVwp0fnqC704lIt+ISGmY2A8h19+nA5cA3unzYzwZmAtOBMjMbD+DujwI/73K4u4Br3P0iYDVwXTxqTnbXT6tkUGY69y5aH/NzVm3dz6d+8TxjhxXww6snkZWhC08R6V68PiVmAPcBuPt+YClwTlT73cAWYDNwWfDzMcwsCzjg7p3t84FL4lNycivOy+Ka2lH87qXtrN/VfNL939hzkI///FmG5Gbx84+/XXMqRCQm8QqNEmBn1ON6YCiAmeUCc4AqYBRwFXCiCQAlwK7OB+7eir7xdUI3TBtNTkY63z/J2MbeA0f42M+epa3Duf+68xmmGdsiEqN4hcYeoCzq8XDe+vA/G1js7vXu7sB64JZujnP07jxBF9cxAWNmN5rZSjNb2dDQ0Bf1J6WS/Gxm147kt/+7nY0Nx7/aOHSknet+/hzb9x3ivmsmUzVUCwiKSOziFRqLgGsBzKyIyNjFqqBtA3C+mXVeMVwOHDzeQdz9CJBnZp03jb4BWHCc/ea7+2R3n1xWVta1eUC5YVolWRlpfP84Yxtt7R3c/KsXeGnrPu796EQma+a2iPRQXELD3RcCHWa2FPgt8FVgspl92t33APcDi81sOVAHfKebw90K/NLMlgA1wE/jUXOqKCvI5uoLRvLIi9vYtPvA0e3uzlcffZmFr+zia+8/m0trhodYpYgkK4v0EKWOyZMn+8qVK8MuI1S7mg4z7VuLuWxCBXd9ZAIA9z6+jnsWrOWmi8fwRc3FEJEuzOx5dz/pInb6jmUKGlqQw6wLRvDwX7fx+p4D/PdzW7hnwVquOO80vvCus8IuT0SSmEIjRf3DO8aQnmZ87sEXmfPwKqafWca3PjQeM03eE5FTp9BIUcMKc5h1/ghe3LKPceUFzLvqPDLT9dctIr2jOQ8p7LMzxpKdkcYnpo0mP1t/1SLSe/okSWHFeVnMmTku7DJEJIWov0JERGKm0BARkZgpNEREJGYKDRERiZlCQ0REYqbQEBGRmCk0REQkZgoNERGJWcqtcmtmDcDrYdcRglJgd9hFJACdhwidhwidh7ec7FyMdPeT3pAo5UJjoDKzlbEsa5zqdB4idB4idB7e0lfnQt1TIiISM4WGiIjETKGROuaHXUCC0HmI0HmI0Hl4S5+cC41piIhIzHSlISIiMVNoJCkze7+ZbTazc4PH1Wa22MyWmNk8M8sMu8b+YGZ3B7/zU2b2noF4Hsws18weDM7Bk2ZWNxDPQ7Tg97/OzCrM7A9mttTMHjCzgrBr6w9m9l4zWx38/f9PX74f1D2VxMzsNuARd3/RzP4AfNLdt5jZTUCbu/8o3Arjy8w+ANS4+zfNLBdYAOxn4J2HSmC4uz9lZtXAzcBIBth56GRmQ4DHgf8AJgJz3f0FM3svUOfuc0ItsB8Enw2/cfeXgsd99vmgK40UYGZZwAF33xJsmg9cEmJJ/WUd8EMAdz8I7GIAngd33xgExl3ASuDnDMDzAGBm6cBc4LvBpiHu/gKAu/8eqA6rtn42GbjLzP5oZhPow/eDbveaGkqIfGAC4O6tZpbyf7fuvrrzZzO7HHgWOD2qfUCch07u/gUz+xGRD4VXo7YPpPPwdeBeIB8oBtq7tHf0e0XhuNbdd5vZ6cDPgLWdDb19PwyUN1Kq20NkiQAAgv7Krv+zpCwzuxiYBnwZ+M+o7QPiPJjZOGCDux9x93Vmtgk4M6p9oJyH9wAfAKYCg4kEx+Yuuw2I3hV33x38d6uZNQHDO9t6+34YECcw1bn7ESDPzMqDTTcQ6d9PeWY2HbgM+NIAPg/TgWsAzGwwcAZwaKCdB3f/o7vXuPtFwOeB7wPbzOwcADObCawJscR+YWafNrOrg5+HEwnP7L56P2ggPIl1GQg/m8hleRqwGvi8u7eGWV+8mdk04HfAi0DnG/lmIn3aA+k8ZAA/BkYR6X65BTjMAHs/RDOzi4BzgV8T6a7LBbYB/+DuTSGWFndmNojIFXcpkSuKzxP5/6NP3g8KDRERiZm6p0REJGYKDRERiZlCQ0REYqbQEBGRmCk0REQkZgoNkZCY2c+DeRWn+vxrg/W3en0skVgpNEREJGZaRkQGvGAi2BWcYeJ2AAACLklEQVRElt4YTmStnkZgFpH1i64G2oB7gAKgBVgKHAQWAf/X3T9mZjcCWe7+/RO8zizgU0Qm4K0nMuGss+2TwN8BRcAbwDfcfZOZPQqsIrI0RibwZ3e/3czqgFuBNjPrnKT1YTP7KDAEuNPdH+yD0yPyNxQaIhHnAxcTmUm9FHjA3f/OzM4iMst8HHCDu280s2wiM26fcveXzGxFsMJsubtfdbyDm9kFwKXAO4MF4yqBJ4K2vweuAw4Fu1cB/0xkuYezgG+7+1eCfW8xs+vd/T4z+zdgn7v/3sw+QmR59BlBfQsAhYb0OYWGSMSj7n4IIFjw77fB9rVADbDJ3TcCuHuLmf0CqAz2uZ/IlcOsbo4/FfhJ59INQfg8HtX2MXc/ujJt1E1y1rr7E1HH+SHwPeC+47zGT6Lq2xvD7yzSYxrTEIk4FPWzE+l6wiPr7OwHRpjZKIDgX/Kzo/a/k8jqqp8zs/wTHP8p4BOdYWBmY4EZQdvTwK1mlha0XQfcGLSdGSzK2OlGYFkMv4NIXOhKQyQ2nwK+F9wu9CCwGMDMrgQ2uvtzZvZ1IoslfqLrk9396SAoFplZB5H7XSwL2n5pZmOA5WYGkRD5YvDUV4FpZvYvQDawwN2/HbRtInKjnefi8huLHIcWLBRJYGb2iLt/IOw6RDrpSkOkj5nZkq7bgns8iCQ9XWmIiEjMNBAuIiIxU2iIiEjMFBoiIhIzhYaIiMRMoSEiIjFTaIiISMz+P5+gxFHgQ7pxAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = max_depth_s\n",
    "\n",
    "plt.plot(x_axis, test_means)\n",
    "plt.xlabel( 'max_depth' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    性能都差不多，还是选择默认值吧！"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （3）调参num_leaves\n",
    "\n",
    "    num_leaves一般建议为70-80，其值越大模型越复杂，越容易过拟合。\n",
    "    \n",
    "    此时，eta=0.1、n_estimators=42、max_depth=6。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 4 candidates, totalling 20 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:   10.2s\n",
      "[Parallel(n_jobs=4)]: Done  18 out of  20 | elapsed:   16.8s remaining:    1.9s\n",
      "[Parallel(n_jobs=4)]: Done  20 out of  20 | elapsed:   16.8s finished\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "GridSearchCV(cv=StratifiedKFold(n_splits=5, random_state=10, shuffle=True),\n",
       "       error_score='raise-deprecating',\n",
       "       estimator=LGBMClassifier(boosting_type='goss',\n",
       "        categorical_feature=[0, 1, 3, 5, 6, 12, 15, 16, 17, 18, 19, 20],\n",
       "        class_weight=None, colsample_bytree=0.7, importance_type='split',\n",
       "        is_unbalance=True, learning_rate=0.1, max_depth=6,\n",
       "        min_child_samples=20, min_child_weight=....0, reg_lambda=0.0, silent=False,\n",
       "        subsample=1.0, subsample_for_bin=200000, subsample_freq=0),\n",
       "       fit_params=None, iid='warn', n_jobs=4,\n",
       "       param_grid={'num_leaves': range(50, 90, 10)},\n",
       "       pre_dispatch='2*n_jobs', refit=False, return_train_score='warn',\n",
       "       scoring='roc_auc', verbose=5)"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 1、参数设置范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "          #'num_leaves': 60,\n",
    "          'max_depth': 6,\n",
    "          #'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.7,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "num_leaves_s = range(50,90,10)  # 50,60,70,80\n",
    "tuned_parameters = dict( num_leaves = num_leaves_s)\n",
    "\n",
    "\n",
    "# 2、交叉验证找最优参数\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "\n",
    "grid_search = GridSearchCV(lg, n_jobs=4, param_grid=tuned_parameters, cv = kfold, scoring=\"roc_auc\", verbose=5, refit = False)\n",
    "grid_search.fit(X_train , y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8241159818807958\n",
      "{'num_leaves': 70}\n"
     ]
    }
   ],
   "source": [
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**画图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEICAYAAACavRnhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VOXZ//HPBQIaUUQWQdnFHVBkSEAFUajWDbdWZd+xttaqtVqqfXwqfWx/tlhr6wYBRNmsVtpqa1UqIAokBEQ2kUWQRQJhX8KS5fr9MSd2mobNZDiZme/79eLVOefcc3LdTtLvnHPP3Le5OyIiIuVRJewCREQk8SlMRESk3BQmIiJSbgoTEREpN4WJiIiUm8JERETKTWEiIiLlpjAREZFyU5iIiEi5nRB2AcdL3bp1vVmzZmGXISKSUObNm7fF3esdqV3KhEmzZs3IyckJuwwRkYRiZl8eTTvd5hIRkXJTmIiISLkpTEREpNwUJiIiUm4KExERKTeFiYiIlJvCREREyi1lvmciIvE178vtzPh8c9hlSBluvbQRzeueHNefoTARkXJ7Z9FG7pv8CQVFjlnY1UhplzatrTARkcrt9Zx1PPLnhbRtUpsx/dtT66RqYZckIYjbmImZPWlmH5rZDDO7rNSxnmY228xmmtnwmP0jzGy6mc0ys+vKOOdvYx5nmNkyM7slXn0QkcMb+/FqfvLGQi5vWZdXB6UrSFJYXMLEzLoCae7eGegO/NLMYn/L+gDXA52BembWJgiFbe7eBegGPGZmVWLOeR9wZ8m2u2cBv45H/SJyeO7Os/9awS/eWsq1F51BZr8IadV1oyOVxevV7wpkArj7TjObAbQG5gfHRwDrgK1BDcOAImBm8Jx8M1sPGHwdTnWAeXGqV0SOkrvz5D8+Y9TM1dx26Vk8dXsbTqiqD4amuniFSR1gU8x2LlAfwMzSiIZHy6DNlUCRuy8paWxm3YHZ7l5kZi2A24B7gSnHUoSZDQWGAjRp0uQbd0ZEooqKnUenLGLy3HX069iUx2+6iCpVNOIu8Rsz2QrEzn/fACj5zGArYJq757q7AyuBR0oamtlVQCd3fya4NfYscAkwDbjCzDKPtgh3H+nuEXeP1Kt3xOn4ReQwDhYW86PJnzB57jruvaol/9tdQSL/Fq8w+QDoD2BmtYiOjSwKjq0C0s2s5KqoO5AftO0M3AQ8DODuBe5+o7tfHoylfOTug+NUs4gcwr6DRdz9ag5vL9zIsOvO56Frz8P0GWCJEZfbXO4+1cy6BWMlEL2tFTGztu7+vJmNA6YFv4zrgMFm1gl4C1gQc2yAu6+OR40icnR27y9g0Lgc5q7ZxpO3tqZnhm4Zy3+z6J2m5BeJRFwrLYocm217D9J/bDZLv9rFiDsu5uZLzgq7JDnOzGyeu0eO1E6f5RORMm3atZ/emVl8uS2fl/q0o+sFZ4RdklRiChMR+S9rt+bTa/Qctu05yLgB6XQ8u07YJUklpzARkf+wfNNuemdmcbComAlDOnBJ49PCLkkSgMJERL62cP0O+o3J5oSqVXhtaEfOa3BK2CVJglCYiAgAWV9sZdC4HE5Lq8aEwRk0rRPfWWYluWgOBBFh2rLN9B2TzRmn1uD173VUkMgx05WJSIp769OveOC1BZzf8BTGDUinTs0aYZckCUhhIpLCJmevZdiURUSa1mZ0//aceqKmkJdvRmEikqIyZ37BL//+GVeeW48Xe7fjpOpVwy5JEpjCRCTFuDu/m7qCZ/+1gutbN+CZO9tS/QQNn0r5KExEUkhxsfPE20t5edYa7og04le3taGqZv6VCqAwEUkRhUXF/PTNRbwxbz0DL2/OYzdcoCnkpcIoTERSwIHCIu6fvIB3Fufyo67ncH+3czSFvFQohYlIkss/WMj3xs/nw+V5PHbDBQzu1CLskiQJKUxEktiu/QUMHDuX+Wu389TtbbijfeOwS5IkpTARSVJb9xyg75hslm/azR96XMoNbRqGXZIkMYWJSBLauHMfvTOzWL99HyP7RrjqvPphlyRJTmEikmTWbNlLr8wsdu4r4JWB6WS00FokEn8KE5Eksix3F31GZ1NYVMykIR1o3ahW2CVJilCYiCSJBeuia5GcWK0Kf7q7I+ecobVI5PhRmIgkgVmrtjBkXA51atZgwuAMGp+eFnZJkmIUJiIJburSTXx/4nya1Unj1UEZnHHqiWGXJClIYSKSwP66YAMP/ulTLjrzVMYNSKf2ydXDLklSlMJEJEFNyPqSx/6ymPRmp5PZL8IpWotEQhS3eafN7Ekz+9DMZpjZZaWO9TSz2WY208yGx+wfYWbTzWyWmV1Xxjl/G/P4h2b2UXCOW+LVD5HK6MUZq3h0ymKuOq8+4wamK0gkdHG5MjGzrkCau3c2s1rAFDO71t0LgiZ9gOuBHcALZtYGaAFsc/cuZpYGvG9m77p7cXDO+4A7gYfMrCXQBegEVAPeMrOZ7r41Hv0RqSzcnd++9znPTVvFTRefydN3XEy1qlqLRMIXr9/CrkAmgLvvBGYArWOOjwDWAWuAm4LHK4AXg+fkA+sBg6/DqQ4wL3j+1cBYjzoITASujFNfRCqF4mLn8b8t4blpq+iR3phn7rxEQSKVRrzGTOoAm2K2c4H6AMFVxzCgZdDmSqDI3ZeUNDaz7sBsdy8ysxbAbcC9wJSY839S6vzN49MVkfAVFhXz8BsLefOTDQzt3IJh152vKeSlUonX25qtQL2Y7QbA5uBxK2Cau+e6uwMrgUdKGprZVUAnd3/GzKoBzwKXANOAK8ws8wjnJ+ZcQ80sx8xy8vLyKq53IsfRgcIivj9hPm9+soGHrjlXQSKVUrzC5AOgP0AwZtIZWBQcWwWkm1nJVVF3ID9o25noba+HAdy9wN1vdPfL3b0L8JG7DyYaLH2C51QHegDTSxfh7iPdPeLukXr16pU+LFLp7T1QyKCXc3hv6Sb+96YLufdqLWollVNcbnO5+1Qz62ZmM4Jdw4CImbV19+fNbBwwLfijWAcMNrNOwFvAgphjA9x9dRnnX2Fmc8xsZrBrhLtvi0dfRMKyM7+AAS9ns2DdDn773Yv5TrtGYZckckgWvdOU/CKRiOfk5IRdhshRydsdXYtk5ebd/KFHW77dSmuRSDjMbJ67R47UTl9aFKlkNuzYR5/MLDbu3M/ofu3pfK5u0UrlpzARqUS+yNtD78wsdh8o5NVB6USanR52SSJHRWEiUkks/WoXfcdk4Q6ThnSg1Vlai0QSh8JEpBKY9+V2BozN5uQaJ/DqoAxa1q8Zdkkix0RhIhKyj1ZsYcgrOZxxag3GD86gUW2tRSKJR2EiEqJ3l+Tyw4mf0KLeybwyKJ36p2gtEklMChORkLw5fz0/eWMhrc+qxcsD2nNamtYikcSlMBEJwauz1/Dzvy7hsrPrMKpvhJNr6E9REpt+g0WOI3fn+emr+M27n9PtgjP4Y8+2nFitathliZSbwkTkOHF3fv3PZbw04wtuueRMfvNdrUUiyUNhInIcFBU7P//rYiZmraVXRhOG39yKKlU0YaMkD4WJSJwVFBXz0Ouf8tcFX3FPl7N5+NrzNPOvJB2FiUgc7S8o4t6J85n62WYe/vZ5fL9Ly7BLEokLhYlInOw5UMiQcTnMWb2V4be0ok+HpmGXJBI3ChORONiRf5B+Y+eyeMNOnr7jYm5tq7VIJLkpTEQq2OZd++kzOpvVW/byQq9LueaiBmGXJBJ3ChORCrRuWz69R2eRt/sAYwe05/KWdcMuSeS4UJiIVJCVm6NrkeQfLGT84AwubVI77JJEjhuFiUgFWLxhJ33HZFPFjNfu7sgFDU8NuySR40phIlJOc9dsY+DYuZx6UjXGD86ged2Twy5J5LhTmIiUw4zledz9ag5n1jqJ8YMzOPO0k8IuSSQUChORb+idRRu5b/InnFP/FF4ZlE7dmjXCLkkkNAoTkW/g9Zx1PPLnhbRtUpsx/dtT66RqYZckEiqFicgxGvvxan7x1lI6nVOXl/q0I626/oxE9FcgcpTcnT98sJKn31/OtRedwbM92lLjBK1FIgIQt8UUzOxJM/vQzGaY2WWljvU0s9lmNtPMhsfsH2Fm081slpldF+zrbmZzgvYvmVmVYP+vgnNPN7ML4tUPEYgGyf/9/TOefn85t116Fs/1vFRBIhIjLlcmZtYVSHP3zmZWC5hiZte6e0HQpA9wPbADeMHM2gAtgG3u3sXM0oD3zexdoDrQBTgATALqm9klwEF3v9LMmgJ/BG6KR19EioqdR6csYvLcdfS/rBn/c+OFWotEpJR4XZl0BTIB3H0nMANoHXN8BLAOWEM0BNYBK4AXg+fkA+sBc/c3gIbABqCGu+cG53o3aPslUIBIHBwsLOZHkz9h8tx1/PDqljx+k4JEpCzxCpM6wKaY7VygPkBw1TEMaAk0A3oBRe6+xN23Bm26A7PdvQjA3VcDTYBlZtYbWAD0tqh2wO6yijCzoWaWY2Y5eXl5ceimJLN9B4u4+9Uc3l64kZ9dfz4/vkaLWokcSrzCZCtQL2a7AbA5eNwKmObuue7uwErgkZKGZnYV0MndnzGzGmZ2PoC7FwLPAR3d/X2iATUdeA/4TVlFuPtId4+4e6RevXplNREp0+79BfQbm8305Xk8eWtrhnY+O+ySRCq1eIXJB0B/gGDMpDOwKDi2Ckg3s5Lxmu5AftC2M9HbXg8HxwqIjqmkBdu3AtkA7v4E8Cgwyt0Xx6kfkoK27T1Ir8ws5n+5nd/f1ZaeGU3CLkmk0ovLALy7TzWzbmY2I9g1DIiYWVt3f97MxgHTglsG64DBZtYJeIvoLaySYwOAJ4B3zawQ+BR4EL6+XfYYcFs8+iCpadOu/fTOzGLttnxG9m3H1eefEXZJIgnBoneaEo+Z3Qcscfd/HU37SCTiOTk5ca5KEtnarfn0Gj2HbXsOktmvPR3PrhN2SSKhM7N57h45UruE/dKiuz8bdg2SPJZv2k3vzCwOFhUzYUgHLml8WtgliSSUhA0TkYqycP0O+o3JplrVKrw2tCPnNTgl7JJEEo7CRFJa1hdbGTQuh9PSqjFhcAZN62gtEpFvQmEiKWvass18b/w8Gp+exvhBGTSodWLYJYkkLIWJpKS3Pv2KB15bwPkNT2HcgHTqaC0SkXJRmEjKmZy9lmFTFtG+6elk9o9w6olai0SkvBQmklIyZ37BL//+GVeeW48Xe7fjpOqa+VekIihMJCW4O797fznPfrCSG1o35Hd3XkL1E+K2AoNIylGYSNIrLnaeeHspL89awx2RRvzqtjZU1cy/IhVKYSJJrbComJ++uYg35q1n4OXNeeyGCzSFvEgcKEwkaR0oLOL+yQt4Z3Eu93c7hx91PUdTyIvEicJEklL+wULufnUeM1ds4ec3XsigK5qHXZJIUlOYSNLZtb+AgWPnMn/tdp66vQ13tG8cdkkiSU9hIkll654D9B2TzfJNu/lDj0u5oU3DsEsSSQkKE0kaG3fuo3dmFht27GNU3whdzqsfdkkiKeOwH7Q3s/qltrX2rVRKa7bs5TsvzGbTrgO8MjBDQSJynB0yTMzsRqIrGcZ60Mxujm9JIsdmWe4uvvvSbPYVFDFpSAfSm58edkkiKedwVyY/IrrcbqzhwH3xK0fk2CxYt4M7X5pDFYM/3d2B1o1qhV2SSEo63JhJFXffG7vD3fNNH9SXSmLWqi0MGZdDnZo1mDA4g8anp4VdkkjKOlyYHGqVoMRcNF6SytSlm/j+xPk0q5PGq4MyOONUrUUiEqbD3eb6zMw6x+4wsw7A6viWJHJ4/1y8ke+Nn8cFDU7htaEdFSQilcDhrkyGAX8xsxHALCACPAJoAF5C88/Fudw78RPaNKrFuIHpnKK1SEQqhUNembh7LnAt0Aj4OdASuMHd845TbSL/4b0ludw7cT6tFSQilc5hv7To7juB3x2nWkQOaerSTfxg4nxanaUgEamMDhkmZjaF/x5srwUMc/fsI53YzJ4ErgjOMczdZ8Uc6wn8ECgEprv7z4P9I4B2QHVguLu/Y2bdgZ8F51kI3BPU/QJwIbAfeNzdPzyqHkvC+ddnm7hnwjwuPLMWrwxK1zK7IpXQIcPE3W8tvc/MGgBvApcd7qRm1hVIc/fOZlYLmGJm17p7QdCkD3A9sAN4wczaAC2Abe7exczSgPfN7F2iwdIFOABMAuoDdYBi4HKgJvA6oDBJQtOWbeae8fO5oOGpvDJQQSJSWR3TuqXBOErBERtCVyAzeM5OYAbQOub4CGAdsAa4KXi8AngxeE4+sB4wd38DaAhsAGq4e667LyEaMl8CG4FVx9IPSQzTP9/M3a/O49wGNXl1YAa1TlKQiFRWxzTRo5ldRvRq4kjqAJtitnOJXlEQXHUMIzqgvwm4EigKAqLk53QHZrt7EYC7rzazJsBwM+sN7Aa2Ak2B04CLjqUfUvnNWJ7H0Ffncc4ZNRk/KINaaQoSkcrsaMdMLHicC9x7FOfdCtQDSj751QCYGzxuBUwLrnIws5VEP3L8aLB9FdDJ3X9iZjWA5u6+zN0Lzew5okFkwJPuXgxsM7MrzWyTuy8v1YehwFCAJk2aHEXZUhl8uDyPIa/k0LJeTSYMzuC0tOphlyQiR3C4jwbf6u63Bf9uBXoBnwNjj+K8HwD9AYIxk87AouDYKiDdzEqCrDuQH7TtTPS218PBsQKiYyol82TcCmQH5/pW8JwaQDeiA/Gl+zDS3SPuHqlXTxMeJ4KPVmxhyCs5nK0gEUkoR7zNZWanEP3k1WDgSaLfPTksd59qZt3MbEawaxgQMbO27v68mY0DpgXTfK0DBptZJ+AtYEHMsQHAE8C7ZlYIfAo8CFQF/mhmfYE04CV3X3sM/ZZK6OOVWxg0bi7N657MhMEZ1D5ZQSKSKMy97Km2zKwu8ADQARgN9Hf3a45jbRUqEol4Tk5O2GXIIcxatYWBL8+l6eknM3FIBnVq1gi7JBEBzGyeu0eO1O5wn+b6X6K3lX7u7hOJfidEpMLNXrWVgS/PpcnpaUxQkIgkpMONmdwLfBvobWYfEHwaS6QiZX0RDZLGtdOYOKQDdRUkIgnpsN8zcfe17v59ooPvH5rZR8EKjCLllr16GwNenstZtU9SkIgkuKP60qK7b3T3B4ne9uoY35IkFcxds43+Y7NpWOtEJg7JoN4pChKRRHZMX1oMZgx+NE61SIqY9+U2+o/JpsGpJzJpSAfqn6L1SEQS3TFNpyJSXvPXbqffmLnUP/VEJg3tQH0tbCWSFBQmctx8snY7/UZnU7dmdSYN6aAVEkWSiMJEjosF63bQd3Q2p9eszqShHWhQS0EikkwUJhJ3C9fvoM/oLGqfHL0iaVjrpLBLEpEKpjCRuFq0fie9M7M4La0ak4Z24MzTFCQiyUhhInGzeMNOeo/O4tSTqjFpSAfOUpCIJC2FicTF4g076ZWZRc0aJzBpSAca1U478pNEJGEpTKTCLf1qF71HZ3Fy9apMHtqBxqcrSESSncJEKtRnG3fRK3MOJ1WryuShHRUkIilCYSIVZlnuLnplZlHjhOgVSZM6ChKRVKEwkQrxee5ueo7KolpVY/LQDjStc3LYJYnIcaQwkXJbvmk3PUfN4YQqxuShHWlWV0EikmoUJlIuK4IgqVrFmDS0A80VJCIpSWEi39jKzXvoMSoLM2PikA6cXa9m2CWJSEgUJvKNrMrbQ49RcwCYNCSDlvUVJCKpTGEix+yLvD30GDkHdw+C5JSwSxKRkClM5Jis3rKXHqPmUFTsTBzSgXPOUJCIyDGutCipbc2WvfQYOYeCImfSkA6cqyARkYCuTOSofLk1ekVyoLCIiUMyOK+BgkRE/k1hIke0dms+PUbOYX9BERMGd+D8BqeGXZKIVDJxCxMze9LMPjSzGWZ2WaljPc1stpnNNLPhMftHmNl0M5tlZtcF+7qb2Zyg/UtmVsXM0oN2Jf82m1ntePUlla3blk+PUXPILyhi/OAMLjxTQSIi/y0uYyZm1hVIc/fOZlYLmGJm17p7QdCkD3A9sAN4wczaAC2Abe7exczSgPfN7F2gOtAFOABMAuq7e3awDzO7Auji7tvj0ZdUtn57PneNnMOeA4VMGJzBRWfWCrskEamk4nVl0hXIBHD3ncAMoHXM8RHAOmANcFPweAXwYvCcfGA9YO7+BtAQ2ADUcPfcUj/rAeCZOPUjZW3YsY+7Rs5h9/4Cxg/KoNVZChIRObR4hUkdYFPMdi5QHyC46hgGtASaAb2AIndf4u5bgzbdgdnuXgTg7quBJsAyM+tdctLgVtjH7r6nrCLMbKiZ5ZhZTl5eXgV3MXl9tWMfd42czc59BYwfnEHrRgoSETm8eIXJVqBezHYDYHPwuBUwzd1z3d2BlcAjJQ3N7Cqgk7s/Y2Y1zOx8AHcvBJ4DOgbtDLgHeP5QRbj7SHePuHukXr16h2omMTbujF6R7NgbvSJp0+i0sEsSkQQQrzD5AOgPEIyZdAYWBcdWAelmVjJe0x3ID9p2Jnrb6+HgWAHRMZWShTFuBbKDx98B/uHu++PUh5STu3M/d42cw/a9B3llUDoXN1aQiMjRicsAvLtPNbNuZjYj2DUMiJhZW3d/3szGAdOiFxesAwabWSfgLWBBzLEBwBPAu2ZWCHwKPGhmVYmG1S3xqD8V5e7cT49Rc9i6JxokbZvow3EicvQseqcpsQRXNWe7++dH+5xIJOI5OTlxrCpxbd4VvSLZtGs/rwzKoF1TBYmIRJnZPHePHKldQk6nEoyfHHWQyKFt3r2fu0ZFg2TcwHQFiYh8IwkZJlIx8nYfoMfIOeTujAZJpNnpYZckIglK06mkqLzdB+g5ag4bd+7n5QHptFeQiEg5KExS0JY9B+iVOYf12/cxpn970psrSESkfBQmKWbrngP0GpXF2m35jOnfng4t6oRdkogkAYVJCtm29yC9MrP4cttexvRrT8ezFSQiUjEUJili+96D9Bw1h9Vb9jK6X3sua1k37JJEJIkoTFLA9uCKpCRILleQiEgF00eDk9yO/IP0Hp3Fyrw9ZPaNcMU5ChIRqXi6MkliO/ML6D06ixWb9zCqb4TO52qySxGJD4VJktq5Lxoky3P38FKfdlypIBGROFKYJKGd+wroOzqLZbm7eLHPpVx1Xv2wSxKRJKcwSTK79hfQd0w2Szfu4oVe7bj6/DPCLklEUoDCJIns3l9AvzHZLP1qJ8/3ake3CxUkInJ8KEySxJ4DhfQbk82i9Tv5Y89L+ZaCRESOI4VJEthzoJD+Y7JZGATJtRc1CLskEUkx+p5Jgtt7oJABY7P5ZN0O/tijLd9upSARkeNPVyYJLBokc5m/dgfP3tWW61o3DLskEUlRCpMElX+wkIEvz2Xe2u38/q5LuKGNgkREwqMwSUD7DhYx8OW5zF2zjd/deQk3tjkz7JJEJMUpTBLMvoNFDBo3l+zV0SDpfrGCRETCpzBJIPsLihj8ylzmfLGVp++4hJsvOSvskkREAH2aK2HsLyhiyCs5zFq1lRHfvZhb2ipIRKTySPgrEzMbY2anhV1HPJUEyUcrt/Cb71zMbZc2CrskEZH/ELcwMbMnzexDM5thZpeVOtbTzGab2UwzGx6zf4SZTTezWWZ2XbCvu5nNCdq/ZGZVYtrfAqx09x3x6kfY9hcUcfer8/ho5Raeur0N32mnIBGRyicut7nMrCuQ5u6dzawWMMXMrnX3gqBJH+B6YAfwgpm1AVoA29y9i5mlAe+b2btAdaALcACYBNQHcoPz9gduj0cfKoMDhUV8b/w8ZizP46nb2/DdSOOwSxIRKVO8rky6ApkA7r4TmAG0jjk+AlgHrAFuCh6vAF4MnpMPrAfM3d8AGgIbgBrunhuc41dEA+g9M7sgTv0IzYHCIu4ZP5/pn+fx69tac0d7BYmIVF7xCpM6wKaY7VyiVxQEVx3DgJZAM6AXUOTuS9x9a9CmOzDb3YsA3H010ARYZma9zawxcCYQAQYBv49TP0JxsLCYH0yYzwfLNvPkra25K71J2CWJiBxWvMJkKxC7tF8DYHPwuBUwzd1z3d2BlcAjJQ3N7Cqgk7s/Y2Y1zOx8AHcvBJ4DOgLpwJ/c/aC7rwF2mFm10kWY2VAzyzGznLy8vIrvZRwcLCzmBxPnM/Wzzfzyllb0zFCQiEjlF68w+YDoeAbB2EZnYFFwbBWQbmYl4zXdgfygbWeit70eDo4VEB1TSQu2bwWyg3NcE3P+2jHjMV9z95HuHnH3SL16lX/Z2oKiYu6dOJ/3l25i+M0X0btD07BLEhE5KnEZgHf3qWbWzcxmBLuGAREza+vuz5vZOGCamUF0vGSwmXUC3gIWxBwbADwBvGtmhcCnwIPuXmxmK8xsFtGB+YdJcAVFxfxw4ie8t3QTv+h+EX06Ngu7JBGRo2bRO03JLxKJeE5OTthllKmgqJj7Jn3CO4tzefymCxlwefOwSxIRAcDM5rl75EjtEv5Li4musKiY+ycv4J3Fufz8RgWJiCQmhUmICouKuf+1Bfx90UYeu+ECBl2hIBGRxKQwCUlhUTEP/OlT3l64kUevv4DBnVqEXZKIyDemMAlBUbHz49c/5a1Pv2LYdeczpLOCREQSm8LkOCsqdh56/VP+uuArHvn2+dx95dlhlyQiUm4Kk+OoqNj5yeufMuWTDfzk2vO4p4uCRESSg8LkOCkqdh5+YyFvfrKBh645lx9c1TLskkREKozC5DgoLnZ++ueF/Hn+eh781rnce/U5YZckIlKhFCZxVlzsDHtzEa/PW8/93c7hvq4KEhFJPgqTOCoudh79yyJey1nHfV3P4f5u54ZdkohIXChM4qS42Hnsr4uZlL2Oe69qyQPddEUiIslLYRIH7s7//G0xE7PW8v0uZ/Pja84lmLhSRCQpKUwqmLvz+N+WMH7OWr535dn85NrzFCQikvQUJhXI3fnFW0t5ZfaX3N25BY98W0EiIqlBYVJB3J0n3l7Ky7PWMKRTc3563fkKEhFJGQqTCuDuDH/7M8Z+vIZBVzTnZ9dfoCARkZSiMCknd+f//v4ZYz5ezYDLm/HYDQoSEUk9CpNycHd+9c4yMj9aTf/LmvE/N16oIBGRlKQw+YbcnV//cxkjP/yCvh0Lz2jaAAAI3klEQVSb8vhNChIRSV0Kk2/A3Xnq3c95acYX9O7QhF90v0hBIiIpTWFyjNyd3773OS9MX0WvjCY80b2VgkREUp7C5Bi4O0+/v5znpq2iR3oTht/ciipVFCQiIgqTY/DM1BX84YOV3NW+Mf93i4JERKSEwuQo/X7qCn7/rxXcEWnEk7e2VpCIiMRQmByFP/xrBb+bupzvtGvEr29royARESklbmFiZk+a2YdmNsPMLit1rKeZzTazmWY2PGb/CDObbmazzOy6YF93M5sTtH/JzKoE+28wsyVB+9fj1Y/np69kxPvLuf3SRvy/2xUkIiJlOSEeJzWzrkCau3c2s1rAFDO71t0LgiZ9gOuBHcALZtYGaAFsc/cuZpYGvG9m7wLVgS7AAWASUB/IBdoDPdx9YTz6UKJZnZP5brtG/Pr2NlRVkIiIlCkuYQJ0BTIB3H2nmc0AWgPzg+MjgHXA1qCGYUARMDN4Tr6ZrQfM3d8ws+bAx0CWu+cG54gAl5lZEfCQuy+JR0eub92Q61s3jMepRUSSRrxuc9UBNsVs5xK9oiC46hgGtASaAb2AIndf4u5bgzbdgdnuXgTg7quBJsAyM+sdnLO/u18DDAGeKasIMxtqZjlmlpOXl1fBXRQRkRLxCpOtQL2Y7QbA5uBxK2Cau+e6uwMrgUdKGprZVUAnd3/GzGqY2fkA7l4IPAd0DLa3BP+7HthuZtVKF+HuI9094u6RevXqlT4sIiIVJF5h8gHQHyAYM+kMLAqOrQLSzazkFlt3ID9o2xm4CXg4OFZAdEwlLdi+Fcg2s++XXKGYWQPgtJjxGBEROc7iMmbi7lPNrFswVgLR21oRM2vr7s+b2ThgWjANyTpgsJl1At4CFsQcGwA8AbxrZoXAp8CDQA3gVTMbTHSs5f549ENERI6ORe80Jb9IJOI5OTlhlyEiklDMbJ67R47UTl9aFBGRclOYiIhIuSlMRESk3FJmzMTM8oAvv+HT6wJbKrCcMKkvlU+y9APUl8qqPH1p6u5H/G5FyoRJeZhZztEMQCUC9aXySZZ+gPpSWR2Pvug2l4iIlJvCREREyk1hcnRGhl1ABVJfKp9k6QeoL5VV3PuiMRMRESk3XZlIwjGzMWZ2Wth1lFey9EMEFCZlKr2Ko5ldaGbTgu3ny5qhuLIqa0XK4HHJvxvCrvFYmNktwEp335Hgr8vX/Qi2E/Y1kcrDzKqb2ehgZdppZtbZzM40s38Eq95ONLNT4vGz47U4VqL7j1UczewfQF93X2dmPwAGAi+FWeAxKN2X6sBGd+8RblnHLpiBuj9we7DrtyTg61K6H4n6mphZOvBUzK4Lic7s/UvAgKXAjxJhRu9D9OU8YErMvt+4+9+Pa2HH7hygGLgcqAm8TnQ9qcfcfX7wRuVnRCffrVAaMymDmb1NdLngIuCnRF+I7wbHqgETS7Yru1J9eQhIAyYAXwEz3P3xEMs7Jmb2PHAFkEd09uiEfF1K9eNeon/0CfmalDCzK4gur30ZcHdMwBe6e6UP+FgxfXkKGJeAIT8OuBo4HRgHNHL37jHH/+ruN1f0z9VtrrLFruL4W/69sBfBu6xEuqIrvSLl58BF7t4FqGJmtx/uyZWFmTUGziS6XPMgEvR1KaMfvydBX5NSHgCeBfa6+7pg30igW3glfWMPEP1buRhoF9x6/EXINR0VM7uZ6OKETYHGwCSibyRjFcfjZytMylBqFcfdRFeKBL5+B1z6xam0Sq9ICeyLue0wGWgbVm3HKB34k7sfdPc1JO7rUrofO0jc1wQAM7sO+Bg4mQQM+FglfXH3PSRmyF8LPO3uxe6+DbiS6PpPseLy//sKk1LKWMWxJlDDzBoGTYYA74dV37Eoa0VK4B0zK/nlugOYF1Z9x2gVcA18PeZwCon5upTuR20S9zXBoqvY3QM8T/Qdcd2YY4kS8MB/9QV335WAIb8I+BZA8DvVDTjJzFoH+64nOpZV4RLqXcNxMpb/XsXRgQlmVgVYQuKs7FhWX1oA082sgOg7sCmHO0Fl4e4LzGyFmc0CDhBd2nkfCfa6HKIfZ5KAr0ngO8A/3H0/gJmdbGYN3X0jiRPwJb7uS/A79R5wg7sfIHFCPhP4o5n1JTo++hLRfoy06PLnG4DvxeMHawBeRL4RM6sK/A24peQdvJm1Ijp28nXAJ8inucrqy81EP4BTEvIV/gmoZKIwEZFvxMxOAM5298/DrqW8kqkvYVGYiIhIuWkAXkREyk1hIiIi5aYwERGRclOYiIhIuSlMREJiZn8JuwaRiqIwERGRctM34EVKMbMuwA3A2cAZwGqiswm0dvdngja3EJ2eZjownOi0O02AfwEziM4GfBZwr7t/eISf1xD4MdFpzw34CPiVuxebWR/gbqIzGHwO1Cc6ff2vgzZfBtOAvEV0+vfmwH1EpyI/SPQb3S+YWXfgcaLTkfd29+3l+o8kUorCRKRsVwNXu/tOMytZn+NQLgfS3X1LsABZvrtfZ2anAy8Chw0TojPUnhGzfSswz8x2EJ1b6Sp3LzCzc4lOqAjwG6IBdB9wC/B20GYU0el/IDrB391mNhPoSHQajSXunn9U/wVEjoHCRKRsb7j7zuDxfKLrdOyJOX5SzOP3S2ZnJjqJ3nsA7r7tKFd/rBPMTAtEJ0gMguHHwOiS6T3cfXkQDLj7KjM70czOBPoBd5pZHaKLbN1Vxrl+AQwlOqX6S65vK0sF05iJSNn2xTwuJrpw1cXw9cqIfQ7R1oHYd/6Hu6L5+vxm1i0494nAn8ysPtGrkAElgWRm5wGdYp43guhKeh+4+wF33wpcEMyPhZnVBv4S1Huiuz9LdMGk2HOIVAhdmYgcnUIgz8xmA9v49+2mijAQ+IOZPUb0b3K4u28GNpvZOcAHZlYELAZmljzJ3T83s2xgVMy5egJPx0xp/2N3P2hmPwgmLiwGnqvA2kUAzc0lIiIVQFcmInFmZv2JfgIr1svu/vJxL0YkTnRlIiIi5aYBeBERKTeFiYiIlJvCREREyk1hIiIi5aYwERGRcvv/i78Nl1+wBXQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "n_leafs = len(num_leaves_s)\n",
    "\n",
    "x_axis = num_leaves_s\n",
    "plt.plot(x_axis, test_means)\n",
    "plt.xlabel( 'num_leaves' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （4）调参min_child_samples\n",
    "\n",
    "    1. 推导过程，由于叶子节点数目为70，一共2分类，平均每类35个叶子节点。\n",
    "    2. 每棵树 稀有事件 的样本数目为：8万 * 4/5(cv) * 0.014(bootstrap-subsample) = 840。\n",
    "    3. 所以每个叶子节点的样本为：840/35 = 25，故其搜索范围10-50。\n",
    "\n",
    "    此时，eta=0.1、n_estimators=42、max_depth=6、num_leaves=70。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 9 candidates, totalling 45 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    8.6s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8263499358144833\n",
      "{'min_child_samples': 40}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Done  45 out of  45 | elapsed:   31.5s finished\n"
     ]
    }
   ],
   "source": [
    "# 1、设置参数范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 70,\n",
    "          'max_depth': 6,\n",
    "          #'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.7,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "min_child_samples_s = range(10,55,5) \n",
    "tuned_parameters = dict( min_child_samples = min_child_samples_s)\n",
    "\n",
    "\n",
    "# 2、交叉验证找最优参数\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "grid_search = GridSearchCV(lg, n_jobs=4, param_grid=tuned_parameters, cv = kfold, scoring=\"roc_auc\", verbose=5, refit = False)\n",
    "grid_search.fit(X_train , y_train)\n",
    "\n",
    "\n",
    "# 3、打印结果\n",
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**画图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZMAAAEICAYAAACavRnhAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8VPW5+PHPM1kJhIQlECBh32RJQKMiSgRBFlHU1rbX7V7bWnvbuv9al/Z6a2trW6+0alu32ttrF2vrrqhEkFXBBRQSwiI7BJIQAgmB7DPP7485oWNMIJCZOTPJ8369eHnmnDPnPDNO5pnvLqqKMcYY0x4etwMwxhgT/SyZGGOMaTdLJsYYY9rNkokxxph2s2RijDGm3aImmYjIBBG5z+04jDHGfFHIkomIPCgiK0RkuYhMbnbsGhFZLSIrReSBgP3zRWSZiKwSkTkB+2OAnwOPB+ybKyKFzvkvhOp1GGOMObnYUFxURKYDSaqaKyIpwCsiMktVG5xTrgcuASqAJ0QkCxgKHFLVqSKSBCwSkTxV9QF3AE+pannAbc4GrlbV/FC8BmOMMW0XkmQCTAeeAVDVShFZDowHPnGOzwf2AuVODPcCXmCl85xqESkCREQGAfcAG0TkLFX9sXONHGCyiHiB76tqYYheizHGmJMIVTLpBZQGPC4B+gA4pY57geHOORcC3sBkICLzgNWq6hWRu4ErVXWliDwsIvNU9XXgBlU9KCIZwJ+Ai08UUO/evXXw4MHBe4XGGNMJrF279qCqpp3svFAlk3IgDShzHqcDHzvb44ClqloCICLbgLuBHzmPpwFTVPUHTc9V1ZXO9p+Aa4DXVfUggKoWichhEYkLqEbDudZNwE0AAwcOZM2aNcF/pcYY04GJyO62nBeqBvglwA1OIClALlDgHNsOnCMiTYlsHlDtnJsLXAbcFXCtRhEZ7WzPBApE5Lsicp3znHQgtXkiAVDVp1U1R1Vz0tJOmliNMcacppCUTFR1sYjMcNpKwF+tlSMiE1X1cRF5FlgqIuBvO7lRRKYAbwDrAo593Xnu006PrgLgESAR+IuI3Ii/reX2ULwOY4wxbSOdZdbgnJwctWouY4w5NSKyVlVzTnZe1AxaNMYYE7ksmRhjjGk3SybGGGPazZKJMcaYdrNkYoyJGjX1Xv7+0R58vs7RcSiaWDIxxkSNv324m3tfLmD1jvKTn2zCypKJMSZqLMgvBmB9UYXLkZjmLJkYY6LC3kPVrNvrTyL5eytdjsY0Z8nEGBMV3irwl0omDky1kkkEsmRijIkKC/KLycpIYe74fhRX1nKgqtbtkEwASybGmIi36+AxCvZVcmlWPyZkpgJW1RVpLJkYYyLem04V19ys/oztn0KMR6yqK8KEaj0TY4wJmgX5xZw5MJUBqV0AGNGnG+uLrGQSSaxkYoyJaNvLjrKp+Ahzs/of3zchM5X8ogo6y6zn0cCSiTEmoi1YX4wIzB3f7/i+rIxUKqob2HOo2sXITCBLJsaYiPZmwX7OHtST9JTE4/uyMlIArKorglgyMcZErM9Kq/is9CiXZvf73P5R6ckkxHrI32uN8JHCkokxJmItWL8fj8Dscemf2x8X42Fs/+7WoyuCWDIxxkQkVWVBQTHnDulFn+TELxzPykhlw74jNHp9LkRnmrNkYoyJSJuKq9hRduwLVVxNJmSmUtPgZVvZ0TBHZlpiycQYE5EW5O8nxiPMHpve4vGmRngbCR8ZLJkYYyKOqrIgv5jJw3rRq1tCi+cM7tWV5MRY1lm7SUSImmQiIhNE5D634zDGhN6GfUfYc6iaS7NaruIC8HiErIwU8i2ZRISQJRMReVBEVojIchGZ3OzYNSKyWkRWisgDAfvni8gyEVklInMC9scAPwceb8v1jTHRbUH+fmI9wqxWqriaZGeksrm4itoGb5giM60JydxcIjIdSFLVXBFJAV4RkVmq2uCccj1wCVABPCEiWcBQ4JCqThWRJGCRiOSpqg+4A3hKVcvbeH1jTJRqquKaMqI3qUnxJzw3KyOVRp+ysfgIZw7sEaYITUtCVTKZDjwDoKqVwHJgfMDx+cBeYBdwmbO9FXjSeU41UASIiAwC7gHuFJGftPH6xpgo9eneCvZV1HxuLq7WZGc2NcJbVZfbQpVMegGlAY9LgD4ATqnjXmA4MBi4FvCqamFAyWMesFpVvcDdwJWqOhXo6hxr9frGmOj2Zn4x8TEeZo7te9Jz07sn0ic5gXybVsV1oUom5UBawON04ICzPQ5Yqqol6p/ycxv+hAGAiEwDpqjqI03PVdWVzvafgHNPcv3jROQmEVkjImvKysqC8LKMMaHk8ylv5heTOzKN7olxJz1fRMjKSLUeXREgVMlkCXADgNOmkQsUOMe2A+eISFN7zTyg2jk3F3+1110B12oUkdHO9kznOie6/nGq+rSq5qhqTlpaWvPDxpgIs3bPYUqO1HJZKwMVW5KdkcKOsmMcqbUmUzeFpAFeVReLyAwRWe7suhfIEZGJqvq4iDwLLBUR8LeX3CgiU4A3gHUBx77uPPdpp0dXAfCIqmrz61vjuzHRb8H6/STEeph+xsmruJpkO8v4biiqZPLw3qEKzZxEyFZaVNV7Wti92jn2EvBSs2MrgZRWLje9jdc3xkQpr095a0MJ00b1oVtC27+amkbCryuqsGTioqgZtGiM6dg+2nmIsqq6Vufiak1qUjyDeiXZtCous2RijIkIC/L30yUuhotGn3rHzOyMVBsJ7zJLJsYY1zV6fSzcUMJFZ/QhKf7Ua9+zMlLYX1nLgaraEERn2sKSiTHGdR/sOET5sXouO8FcXCfS1AhvVV3usWRijHHdgvz9dI2PYeqo0xt7PLZ/d2I8YlVdLrJkYoxxVYPXx8LCEi4e05fEuJjTukZSfCwj+nRjnY2Ed40lE2OMq97bdpCK6oY2zcV1Ik2N8P6JNUy4WTIxxrjqzfxikhNjyR3ZvjEi2ZmpVFQ3sPdQTZAiM6fCkokxxjV1jV7yCkuYOSadhNjTq+Jq0jR4cb21m7jCkokxxjUrPztIVW3jKQ9UbMmo9GQSYj2st+noXWHJxBjjmjcLiknpEsf5w9o/DUpcjIex/bvbdPQusWRijHFFbYOXRRtLmT02nfjY4HwVZWWkUrCvkkavLyjXM21nycQY44plW8o4WhecKq4m2Zkp1DR42VZ2NGjXNG1jycQY44oF+fvp2TWe84b2Cto1szJsJLxbLJkYY8Kupt7Lu5sOMHtcOrExwfsaGtKrK8mJsdajywWWTIwxYbdk8wFqGrxceppzcbXG4xGyMlIsmbjAkokxJuwW5O8nLTmBc4cEr4qrSVZGKpuLq6ht8Ab92qZ1lkyMMWF1tK6RJZsPcMm4dGI8EvTrZ2ek0uhTNhUfCfq1TessmRhjwurdTaXUNfraPRdXa7IznZHwNngxrCyZGGPCakF+MendE8kZ1CMk10/vnkhacoINXgwzSybGmLA5UtvA8i1lXDK+H54QVHEBiAjZGanWCB9mlkyMMWGzqLCUeq8vqAMVW5KdkcL2smMcqW0I6X3Mv1gyMcaEzZsFxQxI7cJEZ5ndUMlyrr/BqrrCxpKJMSYsKqsbWLm1jLlZ/RAJTRVXk+zj09FbMgmXkCUTEXlQRFaIyHIRmdzs2DUislpEVorIAwH754vIMhFZJSJzAvYvC/g319k3V0QKnX0vhOp1GGOCI6+whAavBn2gYktSk+IZ1CvJenSFUWwoLioi04EkVc0VkRTgFRGZpapNFZjXA5cAFcATIpIFDAUOqepUEUkCFolInhNjsape3ew2ZwNXq2p+KF6DMSa43sjfz8CeSYwfkBKW+2VlpLJ216Gw3MuErmQyHXgGQFUrgeXA+IDj84G9wC7gMmd7K/Ck85xqoAgQIBs4yymB/CTgGjnAwyLytoiMDdHrMMYEwaFj9azaXh6WKq4m2Rkp7K+spayqLiz36+xClUx6AaUBj0uAPgBOqeNeYDgwGLgW8KpqoaqWO+fMA1arqhfYAoxV1amAR0S+7FzzBlWdCXwLeKSlIETkJhFZIyJrysrKgvwSjTFttXBDCV5feKq4mmQ7jfD51kU4LEKVTMqBtIDH6cABZ3scsFRVS1RVgW3A3U0nisg0YIqqPgKgqkcCqseeByY6+w86/y0CDotIXPMgVPVpVc1R1Zy0tLTmh40xYbIgfz9De3dlTL/uYbvn2P7d8YiNhA+XUCWTJcANAE6bSS5Q4BzbDpwjIk3tNfOAaufcXPzVXnc5jz0islhEEpxzvwqsFZHvish1zjnpQGpAwjHGRJCyqjo+2FHOpWGs4gJIio9lZN9k69EVJiFpgFfVxSIyQ0SWO7vuBXJEZKKqPi4izwJLnQ/WXuBGEZkCvAGsCzj2deC3wDIRaQDeV9VXRKQL8BcRuRHwAreH4nUYY9pv4YZifErI5uI6keyMVN7ZWIKqhjWRdUYhSSYAqnpPC7tXO8deAl5qdmwl0FI3j53Aa82uXQNcFYQwjTEh9kZ+MSP6dGNUenLY752VmcI/1uxl76EaBvZKCvv9OxMbtGiMCZnSI7V8vOsQl7pQKgF/yQSwebrCwJKJMSZk3iooRhXmhrEXV6BR6ckkxHqsR1cYWDIxxoTMgvxiRqcnM7xPN1fuHxfjYUz/7qzfa43woWbJxBgTEvsrali7+zCXZbtTxdUkOyOVDfsr8frU1Tg6OksmxpiQeDO/GCCsAxVbkp2ZQnW9l20HjroahxuKK2uY+9hKPg7DtDKWTIwxIbGgoJjxA1IY1Kurq3FkNTXCd8LBi+8UllK4/wg9kuJDfi9LJsaYoNt7qJr1eytca3gPNKRXV5ITYjtlj668whKGpXUNS5uVJRNjTNAtcKq45o53P5l4PEJWZkqnWxP+8LF6Ptx5iFlj08NyP0smxpigW5C/nwmZqWT2jIyBglkZqWwqPkJtg9ftUMLm3c0H8PqU2eMsmRhjotDOg8co3H/E9Yb3QNkZKTT6lE3FR9wOJWzyCkvon5IYtvVjLJkYY4Lqzfz9AFwSAVVcTf41HX3nqOqqrm9kxWdlzBybHrY5ySyZGGOCakF+MTmDetA/tYvboRyX3j2RtOSETtOja/mWMuoafcwc2zds97RkYowJmm0HqthcUhVRVVwAIkJ2Rkqn6dGVV1hCj6Q4zhncM2z3tGRijAmaBfnFiMCcCKriapKdkcqOg8eoqu3YSx/VN/p4d/MBZpzRl9iY8H3FWzIxxgSFqrIgv5hzBvekb/dEt8P5gqzMVFShYF/HbjdZvaOcqtrGsHUJbmLJxBgTFFtKq9h24CiXujwXV2uynF5NHX3Sx7zCEpLiY7hgRO+w3teSiTEmKN7ML8YjMCdM4xpOVY+u8QzqldShp6P3+ZRFG0uZOiqNxLiYsN7bkokxpt2aqrjOG9aL3t0S3A6nVVkZqR26e/Cnew9TVlUX9iousGRijAmCwv1H2HnwmGsrKrZVdkYK+ypqKKuqczuUkMgrLCUuRpg2uk/Y723JxBjTbgvyi4n1CLNd+EV8Kv41eLHjVXWpKgs3lDB5WG+6J8aF/f6WTIwx7aKqvFmwn/OH96ZH19BPdd4eY/t3xyOwvgNWdW0uqWLPoWpXqrjAkokxpp3yiyrZe6gmIqabP5mk+FhG9k3ukCPh8wpLEIGLx4Rv1HsgSybGmHZZkL+fuBhh1pjIruJqkp2RSn5RBaodaxnfvMJScgb1IC3ZnQ4QIUsmIvKgiKwQkeUiMrnZsWtEZLWIrBSRBwL2zxeRZSKySkTmBOxfFvBv7smub4wJD59PeTO/mNwRaaQkhb+e/nRkZaZwuLqBosM1bocSNHvKq9lUfMS1Ki6A2FBcVESmA0mqmisiKcArIjJLVZvmMbgeuASoAJ4QkSxgKHBIVaeKSBKwSETynBiLVfXqU7i+MSYMPt1bwf7KWn4we5TbobRZtrOM77q9FRGz3kp75RWWALiaTEJVMpkOPAOgqpXAcmB8wPH5wF5gF3CZs70VeNJ5TjVQBAiQDZzllEp+0sbrG2PCYEH+fuJjPcw4w516+tMxKj2Z+FhPh+rRlVdYwhn9uruaHEOVTHoBpQGPS4A+AE6p415gODAYuBbwqmqhqpY758wDVquqF9gCjFXVqYBHRL58ousHEpGbRGSNiKwpKysL7is0ppPz+ZS3CoqZOjKNZBe6op6uuBgPY/t37zA9usqq6li75zCzwjjdfEtClUzKgbSAx+nAAWd7HLBUVUvU3wK2Dbi76UQRmQZMUdVHAFT1SED11fPAxJNc/zhVfVpVc1Q1Jy0trflhY0w7fLzrEKVH6iJ2Lq4Tyc5IZcO+Sry+6G+EX7SxFFXCtjxva0KVTJYANwA4bRq5QIFzbDtwjog0tdfMA6qdc3PxV3vd5Tz2iMhiEWnqnvBVYO1Jrm+MCYM3C4pJjPMw3YXR1u2VlZFCdb2XbQeOuh1Kuy0sLGFQryRG9U12NY6QNMCr6mIRmSEiy51d9wI5IjJRVR8XkWeBpc5yknuBG0VkCvAGsC7g2NeB3wLLRKQBeF9VXwFofn1rfDcmfLw+5a2CEi4a3YeuCSH5GgmpppHw64sqGJXu7pdwexypbWD19oN8/fwhYVuetzUh+xSo6j0t7F7tHHsJeKnZsZVASgvP2Qm81sbrG2PC4MMd5Rw8Whfxc3G1ZkivriQnxLJ+bwVfzcl0O5zTtnTzARq86np7CdigRWPMaXgjv5ik+BimjYq+Ki4Aj0cYn5ES9TMI5xWWkJacwMTMHm6HcuJkIiJ9mj22VmxjOrlGr4+FG4qZcUZfusSHd82MYMrOTGVzyRHqGr1uh3Jaahu8LNtSxswxffF43K3ighMkExG5FPivZrvvFJHLQxuSMSaSrdpezuHqhqiYi+tEsjNSaPAqm4qr3A7ltKzcepDqeq+rAxUDnahkchv+hvNADwC3hi4cY0ykW5C/n+SEWC4cGd0VFVnOSPhonfQxr7CE5MRYJg3t5XYowImTiUdVjwXucEamu1+eMsa4or7RR15hKReP6Rv2ZWGDrV9KImnJCayPwpHwjV4f724qZfroPsTHRkbT94mi6NrK/ugf5WOMOS3vbztIZU0Dl2ZHdxUXgIiQHaWN8B/tOsTh6gbXByoGOlEy2eQMIjxORCbh76prjOmE3sjfT/fEWC4YHt1VXE2yMlLZXnaUqtroGqb2TmEpCbEeciOoqvFEyeRe4CER+YqIDHAa3n/NF9tRjDGdQG2Dl0WFpcwamx4xVSvtlZ2ZiioU7Iue0omqkldYQu7INJLiI2fAaKufCFUtAWYBGcB9+CdmnKuqNmOiMZ3Qyq0HqaprjMq5uFqTNcA/TjqaqrryiyoprqyNmF5cTU6Y1pzp3X8TpliMMRFsQf5+eiTFMXlYZPQeCoYeXeMZ2DMpqnp05RWWEOMRZpwRWQNGW00mIvIKX2xsT8E/D9ZHIY3KGBNRahu8LN5YyrwJ/YmL6RhVXE2yM1P5ZPdht8Nos7zCEiYN7UlqUrzboXxOq8lEVa9svk9E0oGXAVsm15hOZOnmAxyr90btXFwnkp2Rwhvr91NWVefa+ultte1AFdvLjvEfkwe7HcoXnNJPDKcdJbq6PRhj2m1BQTG9u8Vz7pCebocSdE2DF6Nh5cW8Qv+agDPHRFZ7CZxiMhGRyfjXbTfGdBLV9Y0s2XSAOeP6EdvBqrgAxg3ojkeIipUX8wpLyM5MJT0l0e1QvqCtbSbibJcAN4chLmNMhHh30wFqGrxRPxdXa5LiYxnZNzniSyb7K2rIL6rkrtmj3A6lRW1uMxGRLsBNwJ+AGSGOyxgTIRbk76dPcgJnD+54VVxNsjJSnOVv1fVFplrzTmEJALMjrEtwk5OWWUUkWUR+CBQCx/CPPTHGdAJH6xpZuqWMS8b3IyYCpjkPlayMVA5XN1B0uMbtUFq1sLCEEX26MTStm9uhtOhEU9D3FpGfA68Cu4BtqvqMqkbn5P/GmFO2eGMp9Y0+LusAc3GdyISAZXwj0aFj9Xy081DEDVQMdKKSyf3AlcB9qvoc0BiWiIwxEWNB/n76pSRGxEp+oTQqPZn4WE/EDl5cvKkUnxKdyURVbwZmA9eJyBIgsoZbGmNCqrKmgeWflTF3fL+IWMkvlOJiPIzp1z1ie3S9U1jCgNQujBvQ3e1QWnXCNhNV3aOq3wWuBVaIyHvOCozGmA5u0cZSGrzaoebiOpEJmals2FeJ1xdZq2wcq2tkxdaDzBzbN2I7B0Abx5moarGq3om/2uu80IZkjIkEC/L3k9GjC9kZKW6HEhZZGSlU13vZXnbU7VA+Z9mWMuobfRFdxQWnPgK+TFV/FKpgjAmF+kYfv3h7ExfNX8aSzaVuhxMVDh+r572tB5mb1S+ifw0HU9NI+HUR1m6SV1hCz67xEd81O2TDWUXkQRFZISLLnZHzgceuEZHVIrJSRB4I2D9fRJaJyCoRmdPCNR8O2B4rItud85eJSHSvIWpCYtfBY1z15CqeWr6DY3WNfOP/1nDXi+s5EmWLIYXTtgNH+eazH9PoU+Z1kiougKG9u5KcEBtRgxfrG30s3XyAGWf0ifiu2SFZWUVEpgNJqporIinAKyIyS1Wb/oKvBy7BPzXLEyKSBQwFDqnqVBFJAhaJSJ6q+pxr3gp8Dfi+c42zgTtU9fVQvAYT/V79dB8/eqWAGI/w5HVnMm10Hx5dvJUnl2/nva0H+Z+vZHP+8N5uhxkxGr0+/rByJ79Z/BlJ8TE8+m8TGNu/c1RxAXg8wvgIW8Z31Xb/GjKRtDxva0JVMpkOPAPH10RZDowPOD4f2It//MplzvZW4EnnOdVAEf5pXJqSUy9gbcA1zgbuFJElIjIlRK/DRKGjdY3c+c913P6PdYzp3523b89l9rh+JMTGcNfs0bz4nckkxsVw7TMf8t+vbaC63nq9bymp4stPrOJXCzczfXQfFt1xIZdPGOB2WGGXlZHKpuIj1DVGxnC6vMISusbHMHlY5P/oCdWaj72AwMrpEpyuxU6p4178KzeWAhcCXlUtbDpZROYBq1XVKyJDgS/hnxPslYBr/lhVD4pID+AlEZmjqnUhej0mShQUVXLr85+yu/wYt00fwS0XDf/C5IRnDuzBm7dO4X/ytvC/7+9k+WdlzP9KNjkRXicdCg1eH08u285jS7bSPTGO319zZoedg6stJmSm0OBVNhVXHR/I6BavT1m0sZSpo/uQGBf5tfihKpmUA4Er3acDB5ztccBSVS1RVQW2AXc3nSgi04ApqvqIiMQBjwETgKXABSLSVOI56Pz3MP4SS2bzIETkJhFZIyJryspsteGOzOdTnlm5gy898T61DV6e+9Yk7rh4ZKuz3HaJj+G/LxvD3781Ca9P+cpTq3nwrU3UNkTGL9JwKNxfyeW/e5/5iz5j9rh+vHNHbqdOJBBZ09F/sucwB4/WR3wvriahSiZLgBsAnDaTXKDAObYdOEdEmkpF84Bq59xc/NVedwGoaoOqXqqq56vqVOA9Vb1RRC515gtDRJKBHGBP8yBU9WlVzVHVnLS0tOaHTQdRVlXH1//vY3725iamjerDW7dOYdLQti0te96wXiy8PZerzxnI0yt2cOlv34uIL5JQqm/08et3tnD5797nQFUdT11/Fr+9eiK9ukX2wlDh0C8lkd7dEiKiR1fehhLiYzxMGxUd310hqeZS1cUiMkNElju77gVyRGSiqj4uIs8CS50uh3uBG512jzeAdQHHvq6qO1u4xVvAPBFZ4Tz+b1WtD8VrMZFt5dYy7viHv3fWA5eP5bpJg065K2u3hFgevHI8s8amc/eL+Vz5+Cq+N3UYN180gvjYjrV+R35RBT94IZ8tpVV8aeIA/vuyMRG3/KubRIQJme43wqsqeRtLOH94L5IT41yNpa1C1WaCqt7Twu7VzrGXgJeaHVuJf435E13zCue/PvzT4ZtOqsHr4+F3tvDU8h2M6NONv954DqPT2zfVxIUj08i7I5efvFHIY0u2sXjTAX79tex2XzcS1DZ4eWTxVp5esZ205AT+94YcLhrd1+2wIlJWRirvbj5AVW2Da1/kG4uPsPdQDd+bOtyV+5+OjvWzy3QKu8uPcdWTq3lq+Q6uOXcgr998QdC+8FO6xPHrr07gqevP4kBVLZf99j1+v3QbjV5fUK7vhrW7DzP3sZU8uXw7Xzkrk3fuuNASyQlkZaSgCgX73Cud5BWW4hGYMSZ6/j+FrGRiTCi8tm4fP3plAx6Bx689k0vGh6bBeNbYdHIG9eC+1zbwP3lbWLSxlPlfzWZYhK4l0ZKaei/z39nCH9/fSf+ULvz5G+eQOzI66t/dlH28Eb7StS657xSWkDOoJ72jqB3LSiYmKhyra+T7L6zntufXMTo9mbdumxKyRNKkV7cEfn/NmTx29UR2HjzGJY+u5H/f24kvwiYCbMlHOw8x59EVPPPeTq49dyB5d+RaImmjHl3jGdgzybWOGLvLj7G5pIqZY6OnVAJWMjFRYMO+Sm75+6fsKj/GrRcN59bpI1rt8htsIsK87P5MGtKTe14u4KcLNpJXWMLDX8kms2dSWGI4FcfqGnlo4WaeXb2bzJ5deO5b50bFgLdIk5WRwqd73Ekmec7yvNHSJbiJlUxMxFL1jx258vH3qan38tyNk7hz5qiwJZJAfbon8sf/yOGhq7Io3H+E2Y+s4LkP9+AfKhUZVm07yOxHV/DnD3Zzw+TB5N2ea4nkNE3ITGVfRQ0Hj4Z/HPTCDSWM7d89In+snIglExORDh6t4xvO2JELR/bh7dumcN6wto0dCRUR4as5mSy8fQoTBqbyw1cK+I8/fUxxpbvrhlfVNvDDVwq45pkPifV4+Oe3z+P+eWNJireKh9Pl1uDFA0dq+WRPRdSVSsCSiYlA7287yJxHV/L+9nJ+evlY/vDvZ9Gja+SMhcjokcRfvnEuD1w+lo93HmLmb1bw8idFrpRSln9WxqzfrOD5j/bwrSlDeOvWKRE/VXk0GDegOx6BdXvD26PrnY3+WaiiMZnYTxcTMRq8Pn696DOeXL6dYWnd+PM3zuGMfpE5xsPjEa4/bzBTRqTx/RfWc+c/17NwQwk/v3I8acmh74FTWdPAz9/cyD/XFDG8TzfFoJUiAAAacklEQVRe/M5kzhzYsddpD6ek+FhG9EkOe8kkr7CEwb2SGNk3enoNNrGSiYkIe8qruerJ1TyxbDv/dnYmr998fsQmkkCDe3flH98+jx9eMppln5Ux65EVvFVQHNJ7vruplJm/Wc5Ln+zju1OHseCWCyyRhEC2MxI+XCXOyuoGVm8vZ9a49KhckMySiXHd6+v3M/exlewoO8rvrpnIL76UFVX1/TEe4abcYbx5ywVk9OjCd//2Cbf+/VMqqoM7w09FdT13/GMd33x2DT2S4nn1u+dz1+zRUTGjbDTKykjl0LF6ig6Hp01syZZSGn0alVVcYNVcxkXV9Y38+LVCXlhbxJkDU3n03yZGXQ+WQCP6JvPSdybzxLLtPPbuVlbvKOdXXx4flNHmCzeU8F+vbqCiup5bp4/g5mnDO9y8YZGmafDi+qKKsHwu8zaU0ic5gQkZ7k59f7rs02hcsWFfJZf+9j1e/KSIWy4azj+/fV5UJ5ImcTEebp0+gle/dz69usYfXya46jSXCS4/Wsf3nvuE//zrWvp2T+C1m8/nzotHWiIJg1HpycTHesIy6WNtg5fln5Uxc2xfPBG+PG9rrGRiwkpV+dP7u/jl25vp0TWOv93YMQfVjRuQwms3n398meD3t5Xz0FVZbV4mWFVZkF/Mj18vpKq2ge/PHMm3LxxGnAtjbDqr+FgPY/p1Z30YpqNf8VkZNQ1eZo+N3vVkLJmYsCk/WscPXsxnyeYDzDijDw9dlU3PCOryG2xNywRfPKYv/++f67n2mQ/59/MGcc+c0SdsEzpQVct9r24gr7CU7IwU/ucrkxjZNzmMkZsm2RkpvLC2CK9PiQlhiWFhYQkpXeI4d2j0duu2nzkmLFY5Y0fe23qQ+y8bwx/+PadDJ5JAE51lgr9x/hD+vHo3cx5dyZpdh75wnqryyqdFzPzNCpZuKeOeOaN56TuTLZG4KDszlep6L9vLjobsHg1eH+9uOsD00X2iuuQZvZGbqNDg9fHQws1c+8cP6ZYYy6vfO58bzh8SlV0f26NpmeDnb5qET/3LBP8iYJngkspabnx2DXf8Yz3D0rrx9m1T+M8Lh7kydYz5l6aR8KGs6vpo5yEqaxqYGaW9uJpYNZcJmb2Hqrn1+U/5dE8FX8vJ5MfzxkRVl99QmDS0F2/flsuDb23iqRU7WLL5AF86M4PHl22jwevjvkvHcMPkwSGtUjFtN7R3V5ITYllfVMFXcjJDco+8whIS4zxcGOWzOnfuv2wTMm+s388PXy4A4LdXT+Sy7P4uRxQ5mi8T/KuFmzlnSE8e+nIWg3t3dTs8E8DjEcZnhG4ZX59PeaewlAtHptElPrrHC1kyMUFVU+/l/tcL+ceavUwcmMpjUT52JJSalglet7eCKcN7R22X0I4uKyOVP763g7pGLwmxwf3CX19UQcmRWu4aOyqo13WDJRMTVPe+nM9r6/fzvWnDuH3GyKhuUAyHlC5xUV+90dFlZ6TQ4FU2FVcxITO4AwrzCkuJ9QjTO8AyyvaXboJmyeZSXl23n1suGsEPZo22RGI6hOzM0ExHr6q8U1jCpKG9SEmKC+q13WB/7SYojtQ28MOXNzCybzdunjbc7XCMCZp+KYn07pbA+iBPR7/twFF2HDzGrChbnrc1lkxMUPzy7c0cqKrloauybaoP06GICNkZKawPcslk4Qb/8rzR3iW4Scj+6kXkQRFZISLLRWRys2PXiMhqEVkpIg8E7J8vIstEZJWIzGnhmg8HbN8iIu8517giVK/DnNzq7eU89+EevnnBkKDXKRsTCbIzU9ledpSjdY1Bu2bexhImDkylb/fEoF3TTSFJJiIyHUhS1VxgHvAzEQmsFLweuATIBdJEJMtJCIdUdSowA/gvEfEEXPNW4GvO9nBgKjAFmA58R0TcXdO1k6qp93LPy/kM6pXEnRdHf48UY1qSlZGCKhQEqYtw0eFqNuw7ErXTzbckVCWT6cAzAKpaCSwHxgccnw/sBXYBlznbW4EnnedUA0WAwPHk1AtY6zz/IuBP6lcPPAdcGKLXYk5g/jtb2F1ezS+/lBX1/eSNaU1WwHT0wfBOYfQuz9uaUCWTXkBpwOMSoA+AiCQB9wLDgcHAtYBXVQtVtdw5Zx6wWlW9IjIU+BJwf1uub8Ln0z2H+d/3d3LNuQM5b5gVDE3H1bNrPAN7JgWtR1deYQkj+3ZjSAcapBqqZFIOBHaeTwcOONvjgKWqWqL+9TC3AXc3nSgi04ApqvqIUzX2GDABWApcICLPnOT6BFzrJhFZIyJrysrKgvfqDHWNXu5+KZ++3RO5d85ot8MxJuSyMlKC0qOr/GgdH+86xOwOVCqB0CWTJcANACKSgr9tpMA5th04R0SaBkzOA6qdc3PxV3vdBaCqDap6qaqe77SlvKeqN+JPLNc7z4kHrgaWNQ9CVZ9W1RxVzUlLs4FhwfT7pdv5rPQoP79yHMmJ0d9H3piTyc5IZV9FDQeP1rXrOos3leLTjtOLq0lIkomqLgZ8IrIceB24D8gRke86VVnPAktFZCVwAfAbEZkCvAGc5RxbJiJDWrn+VuAD5/nvAk+q6hfn9DYhsan4CI8v3cYVE/oHZUlaY6JBVkYK0P7Bi3mFpQxI7cLY/t2DEVbECNl0Kqp6Twu7VzvHXgJeanZsJZBykmteEbD9KPBoO8M0p6jR6+Pul/JJ6RLHf1821u1wjAmbcQNS8Ais31t52j+ijtY18t7Wg1w3aVCHW4bB5uYyp+SP7+0kv6iS310zsdMsbmUMQNeEWEb0SW5XyWTZlgPUe33MHtexqrjARsCbU7Cj7Ci/XvQZF4/py9zx0btWtTGnKysjhfVFlfj7Dp26hRtK6NU1nrMG9QhyZO6zZGLaxOdT7nm5gPhYDz+7YlyHK6Ib0xbZmakcOlZP0eGaU35uXaOXZVvKuHhM3w65+JklE9Mmf/toDx/tPMR9c8d0mOkfjDlV2RlNMwifehfhVdvKOVrX2KEGKgayZGJOal9FDb98axMXDO/NV3Iy3A7HGNeMSk8mPsZzWiPh8wpL6JYQy+ThHXOAryUTc0Kqyo9eKcCn8IsvjbfqLdOpxcd6GNO/O+v3nloy8fqURRtLmTa6T9BXa4wUlkzMCb3y6T6WbSnjrtmjbPldY/CvvLhhXyVeX9sb4dfsOkT5sfoOs3ZJSyyZmFaVVdXx0wUbOWtQD/79vMFuh2NMRMjKSOVYvZftZUfb/Jy8wlLiYz1MHdVxpxC0ZGJa9ePXN1Bd5+VXXx7fIXufGHM6mpbxbWtVl6qSV1jCBcN70y2h4w7ts2RiWrRwQzFvFZRw24wRDO+T7HY4xkSMob27kpwQ2+YeXYX7j7CvoqZDV3GBJRPTgsrqBu57rZAx/bpzU+5Qt8MxJqJ4PMK4AW1fxjevsASPwIwzLJmYTuaBNzdy6Fg9D12VRVyMfUSMaS47M5VNxUeoa/Se9Ny8whLOHtyTXt0SwhCZe+ybwnzOis/KeHFtEd/OHcq4ASecd9OYTis7I4UGr7K5uOqE5+08eIzPSo922IGKgSyZmOOO1TVy78sFDEvryq3TR7gdjjERKyuzbcv45hWWADCzg7eXgCUTE+ChhZvZX1nDr76cRWJcxxxYZUww9E9JpHe3hJOuvJhXWMK4Ad3J6NHxx2hZMjEAfLzrEM+u3s1/nDeYnME93Q7HmIgmImRnpJxwOvqSylo+3VPR4ZbnbY0lE0Ntg5e7X8wno0cXfjBrlNvhGBMVsjJS2VZ2lKN1jS0eX7TRX8XVGdpLwJKJAR59dys7Dh7jF18aT9cOPKjKmGDKzkxBFQpaGW+SV1jK0N5dGd6nW5gjc4clk05uw75Knl6xg6/mZDBlRJrb4RgTNbKOT0f/xaquiup6PthRzsyx6Z1mclRLJp1Yg9fHD17Mp2fXeH50yRi3wzEmqvTsGk9mzy4tjoR/d9MBGn3aIZfnbY0lk07sqeXb2VR8hJ9dMY6UpDi3wzEm6mRlpLKuhTm68gpLSO+eSFYnGqtlyaST2lpaxWPvbmNuVr9O00BoTLBNyEhlX0UN5Ufrju+rqfeyYmsZM8f2xdOJJki1ZNIJeX3KXS/lk5QQw/2XjXU7HGOiVlaGv+QRWNW1/LMyaht8ne5HWsiSiYg8KCIrRGS5iExuduwaEVktIitF5IGA/fNFZJmIrBKROc6+eSLygXP+UyLicfbPFZFC5/wXQvU6OqJnV+3i0z0V/PiyMaQld+z5gowJpXEDUvAIn6vqeqewhJQucZwzpHON1wpJP1ARmQ4kqWquiKQAr4jILFVtcE65HrgEqACeEJEsYChwSFWnikgSsEhE8oB4YCpQB/wd6AOUAGcDV6tqfiheQ0e1p7ya/8nbwrRRaVwxYYDb4RgT1bomxDKiT/LxHl0NXh+LN5Vy8Zj0TjdJaqhe7XTgGQBVrQSWA+MDjs8H9gK7gMuc7a3Ak85zqoEiQFT1RaAfsA9IUNUS5xo5wMMi8raIWF1NG6gq976ST4xH+PmVtp67McGQlZFCflElqsoHO8o5UtvY4dcuaUmokkkvoDTgcQn+EgVOqeNeYDgwGLgW8KpqoaqWO+fMA1arqhdAVXcCA4HNInKdc80bVHUm8C3gkRC9jg7ln2v28v62cu6ZM5r+qV3cDseYDiErM5XyY/UUHa4hr7CELnEx5I7sfGO2QpVMyoHAdzMdOOBsjwOWqmqJqiqwDbi76UQRmQZMUdVHRCRBREYDqGoj8HvgPOfxQee/RcBhEflC31YRuUlE1ojImrKysqC/yGhSeqSWn725iXOH9OSacwa6HY4xHcaEjH/NIPxOYSkXjkzrlBOlhiqZLAFuAHDaTHKBAufYduAcEWlqr5kHVDvn5uKv9rrLOdaAv02lacrNK4GPROS7TSUUEUkHUgPaY45T1adVNUdVc9LSOt8vhSaqyo9e2UB9o49ffTmrU3VXNCbURqUnEx/j4c+rdnOgqo5Z4zpfFReEqAFeVReLyAwRWe7suhfIEZGJqvq4iDwLLHXq7PcCN4rIFOANYF3Asa8DPwXyRKQRWA/cCSQAfxGRGwEvcHsoXkdH8UZ+MYs3lfLDS0YzuHdXt8MxpkOJj/VwRv/ufLTrELEe4aLRlkyCSlXvaWH3aufYS8BLzY6tBFoaLroTmNJsXw1wVXtjbIsX1xaxdvdhrp80iDH9u4fjlkF16Fg9979eSHZGCt84f4jb4RjTIU3ISGH93grOG9aLlC6dczYJmyL2JIoranj5kyL+/tEecgb14PrzBjFnXD/iY6Oj299P3iikqraBh66aRGwn66poTLj4J33c3ekGKgayb5eTuGX6CD784XT+a+4ZlB2t47bn1zH5l+/ycN4W9lXUuB3eCb27qZTX1u3ne9OGMyo92e1wjOmwLh7bl29eMITLJ/R3OxTXiL9DVceXk5Oja9asadc1fD5l5baD/GX1Lt7dfAABZpzRl+vPG8T5w3pHVMP2kdoGZv56BSld4njjlguipiRljIksIrJWVXNOdp5Vc50Cj0e4cGQaF45MY++hap77aA//+Hgv72z0L4Jz7aRBXHVWRkTUmf7irc0cqKrlqevPskRijAk5K5m0U22Dl7c3FPPn1bv5dE8FXeJiuGJif66bNIix/d2ZfnrV9oNc84cPuSl3KD+85AxXYjDGdAxtLZlYMgmiDfsq+cvq3by2fh+1DT7OGtSD6ycNYs74dBJiwzOIqbq+kdmPrMQj8PZtuXSJ73yDp4wxwWPJpJlwJJMmldUNvLB2L3/9YDe7yqvp1TWer52dybWTBjEgxNOY/GzBRp55byfP3zSJSUN7hfRexpiOz5JJM+FMJk18PuW9bQf58+rdLNnsn6ps+hl9uX7SIC4YHvwG+0/3HObLT6zi6nMG8vMrx5/8CcYYcxLWAB8BPB4hd2QauSPTKDpczXMf+hvsF20sZUjvrlx77kC+clZmUJbMrWv0cteL+fTtnsg9c0YHIXpjjGk7K5mEWV2jl7cLSvjz6l18sqeCxDgPl2cP4PrzBjGuHetF//qdLTy2ZBt/uuFspo3uE7yAjTGdmpVMIlRCbAxXTBzAFRMHsGFfJX/9YDevrtvHP9bs5cyBqVx/3iAuGd/vlBrsNxUf4fFl27ly4gBLJMYYV1jJJAJU1jTw4toi/vrBbnYePHa8wf6acweS0SPphM9t9Pq48vFVFFfWsOiOC+nRNT5MURtjOgMrmUSRlC5xfPOCIXx98mDe336Qv6zezZPLt/Pk8u1cNNo/wn5KKw32z7y3k4J9lfz+mjMtkRhjXGPJJIJ4PMKUEWlMGZHGvooanvtwN89/tJfFm0oZ3CuJ6yYN+lyD/Y6yo/xm0WfMHNOXS8Z33gnmjDHus2quCFfX6GXhhhL+sno3a3YfJjHOw7zs/lw/aTAPLNjI5pIjLL7zQvp0T3Q7VGNMB2TVXB1EQmwMl08YwOUTBlC4v5K/frCHVz/dxz/XFAHw0FVZlkiMMa6zkkkUqqxp4KW1RVTWNHD7jBE4q1IaY0zQWcmkA0vpEsc3LrBVE40xkcPmJjfGGNNulkyMMca0myUTY4wx7WbJxBhjTLtZMjHGGNNulkyMMca0myUTY4wx7WbJxBhjTLt1mhHwIlIG7D7Np/cGDgYxnGCxuE6NxXVqLK5TE6lxQftiG6SqaSc7qdMkk/YQkTVtmU4g3CyuU2NxnRqL69REalwQntismssYY0y7WTIxxhjTbpZM2uZptwNohcV1aiyuU2NxnZpIjQvCEJu1mRhjjGk3K5kYY4xpN0smLRCRy0Vkl4hMcB6PEZGlIrJMRB4XkbhIiMvZtyzg31yX4prv3H+ViMyJoPfrc3E5+1x9v0QkSUSed2J6X0QuiIT3q6W4nP2uf74CYhwjIt8Qkf4i8paILBeR50QkORLicrafD3i/vulSPHNFpNCJ4YWwfb5U1f618A+4H5jgbL8FZDrb3wO+HSFxxQN/d/l9ugL4kbOdBLwfCe9XK3FFwvs1FJjsbI8BnoiQ96uluFx/vwLi6wF8AtwOPAuc6eyfC/wiEuJyHr8cAe/V/UBWwOOwfL6sZHISIhIPHFPVvc6up4EZLoYUKBs4y/nF8ROXYtgKPAmgqtXAASLj/WoeVxEwEZffL1XdoaqrRORhYA3wf0TA+9VCXP9LZHy+EJEY4FHgEWdXD1X9BEBV38Sf/FyPS0TSgLNFZImI/Nb57nBDDvCwiLwtItmE6fNlyeTkeuH/ggRAVRuInOWOtwBjVXUq4BGRL4c7AFUtVNVyABGZB3xEBLxfLcS1GtiEy+9XQHzfx/9l/UsCRia7/fkKiOshIuDz5fgp8Biwx3nsbXbcF95wjmse1zFglKpeBKwDbnEprhtUdSbwLeBhwvT3aMnk5MrxT0UAgFPf2PzD7ApVPeJ8OACex//L2xUiMg2YAswngt6vprhU9ZFIeL9E5IymX6yquhXYCYwMOO7K+9VCXNuBhAh4v+bgr7J8GH8J4GageRtJ2L/HWonrTKcUDC7+ParqQee/RUAVkN50LJSfL0smJ6Gq9UBXEenn7PoWsMjFkAAQEY+ILBaRBGfXV4G1LsWSC1wG3BVJ71dgXBH0fuUC/+7ElwpkAjUR8H41j2sQ8He33y9VfVtVm0pHtwO/A/aJyHgn1kuAjRES1z0i0tM5xZX3S0S+KyLXOdvpQDcgIRyfr0iprol09wB/ExEPUIj/w+MqVfWJyG+BZSLSALyvqq+EOw4RmQK8gb9Yv1REwF+8d/X9ah6Xs/sVXH6/gD8CfxCRa/FXz9wN1OL+56uluAbg/vvVkh8BT4tIErAP+E+X42nyIPC6iDQC24DvuhDDn4C/iMiN+EsgtwNKGD5fNmjRGGNMu1k1lzHGmHazZGKMMabdLJkYY4xpN0smxhhj2s2SiTHGmHazZGI6HRHp7kwbEqzrTRWRL3S3FJFzRORbrR13zrlfAibudIuI3CAiV7gdh4lelkxMp+OMhP9+GO7zkar+IdT3MSYS2KBF0+GIyFTgS/inKUnHP5DrCHAN0BO4Dv9Ms1eIyKvAemAqkADcqKobTnDt/wT+DYjBP9jwLufQABF52bnfDuA/8E8vMwH/wMmm518M/BD/D7lSAublauFe5wK/ByqBb6nqDhH5Af5pPBqAD/HPID1LRO4H6oFZQCpwG3ApcC5QA8xzYrvfiX0gIMB8VX2t2X2vAq7CPy1OCfBrVf1ERP4EDAfeVdX7W4vbdE6WTExHdQ4wDf8X/nLgOVW9WERG8fkJ+MYCj6rqj0VkOP4v2+tauqCIXIl/+pOLnBkIeuOff6kBuMjZXykiP3fu3djs+QOBW4HLVPWo8/yltL6k6gTgV8DrQIOIfA1/MrxAVVVEzsOZAsWRrqoXOrPX5gOzVfX7IvJt/IllDXCB8/wSEUkE/iwiWwNiPA/4L6DC2ZWBf0LDS/GPiL8Qf9I15nOsmst0VK+pao36p3jYif8LGeAzoH/AeTtUdSmAqm7DP5dRa6YBT6mqzzn/oKo2zXP0oqpWOttrm92jSQ7wgqoebXo+8OoJ7vcHIAX/GhSx+BPBE85rQlWbZkJu8ndnfxlQqKrrW3jNb6lqiXNeLfA3YFLANSYD96jq1KZ/wJXOsVuA+/AnOWM+x5KJ6ahqArYVqAZwvog9rZx3Mu8DNztzHCEiaU61VfPr+Gj5b2st8FUR6eo8vy/+6rjWdFfVZ/BXNf0bsBL4tjgToInI+cAZrbyWowHbga/5EhHp7zw/AX/V3wcB534I3Nk0waOIzAAecI6VqupP8JfejPkcq+Yypo1U9R/O7KsrRcSHvyroXgKm3D/J83eLyO+AN52EsA9YcoKnzHbaaBKBrznPHwiscCYTXIm/OutULAd+6lTpxeBvM9koIuc4Mb4nIi/in7SzEX/7z3dEpBvwjHP/N07xnqYTsIkejekkRGQw/uVlXZ/12nQ8VjIxphkRuQG4odnu/1PV/wvhPZc13+e0VxgTFaxkYowxpt2sAd4YY0y7WTIxxhjTbpZMjDHGtJslE2OMMe1mycQYY0y7WTIxxhjTbv8fEjEOv2vpYWoAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = min_child_samples_s\n",
    "\n",
    "plt.plot(x_axis, test_means)\n",
    "plt.xlabel( 'min_child_samples' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （5）调参sub_samples\n",
    "\n",
    "    此时，eta=0.1、n_estimators=42、max_depth=6、num_leaves=70、min_child_samples=40。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    8.4s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8263499358144833\n",
      "{'subsample': 0.2}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Done  40 out of  40 | elapsed:   27.2s finished\n"
     ]
    }
   ],
   "source": [
    "# 1、设置参数范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 70,\n",
    "          'max_depth': 6,\n",
    "          'min_child_samples':40,\n",
    "          #'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.7,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "subsample_s = [i/10.0 for i in range(2,10)]\n",
    "tuned_parameters = dict( subsample = subsample_s)\n",
    "\n",
    "\n",
    "# 2、交叉验证找最优参数\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "grid_search = GridSearchCV(lg, n_jobs=4, param_grid=tuned_parameters, cv = kfold, scoring=\"roc_auc\", verbose=5, refit = False)\n",
    "grid_search.fit(X_train , y_train)\n",
    "\n",
    "\n",
    "# 3、打印结果\n",
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**画图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEICAYAAABF82P+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEa5JREFUeJzt3X+sZHV9xvH3A4viKr+5lBrFldCmIqDoiEploS5Fi7Jim7QJSAqGbKxoS2wVt0pEQSptEaoULJJWrNXUVqFStVVg2YWy/rhraSmapktFaXRhhUAbVlHg0z/mXB0u353ZhZ07917er2TDnHO+Z+ZhMneeOefc+d5UFZIkzbbTpANIkuYnC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkpiWTDvBE7LvvvrVs2bJJx5CkBWXDhg0/qKqpUeMWdEEsW7aM6enpSceQpAUlyXe2ZZynmCRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJarIgJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkprmZUEk+cske046hyQ9mY2tIJKcn2RdkrVJjpy17aQk65PcmOTcWdtOBDZW1X3jyiZJGm0sBZFkBbC0qpYDK4HzkuwyMOQU4HhgOTCV5LBuvz2AU4ELxpFLkrTtxnUEsQK4AqCq7gfWAocObL8QuBO4Azihuw3wR8CBwJeSPG9M2SRJ22DJmO53H+CugeVNwH4ASZYCq4GDujFHAw8neTbwTKDX/fdy4Lgx5ZMkjTCuI4h7gKmB5f2Bu7vbhwBrqmpTVRWwETgLOAL4dFX9uKruAO6bdVoKgCSrkkwnmd68efOY4kuSxlUQ19O/ljBzXWE5cGu37XbgiCQzRy8rgS3d+uMG9tmrqn4y+46r6vKq6lVVb2pqavZmSdIOMpZTTFV1bZJjk6ztVq0GekkOr6pLk1wJrEkC/esPp1fVliT/leRm4EHgHePIJknaNumf5VmYer1eTU9PTzqGJC0oSTZUVW/UuHn5RTlJ0uRZEJKkJgtCktRkQUiSmiwISVKTBSFJarIgJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkposCElSkwUhSWqyICRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJarIgJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkposCElSkwUhSWqyICRJTRaEJKlpbAWR5Pwk65KsTXLkrG0nJVmf5MYk5zb2PTjJG8eVTZI02lgKIskKYGlVLQdWAucl2WVgyCnA8cByYCrJYQP77gV8Ath9HNkkSdtmXEcQK4ArAKrqfmAtcOjA9guBO4E7gBO62yTZGfgz4OIx5ZIkbaMlY7rffYC7BpY3AfsBJFkKrAYO6sYcDTzcjXsf8CHgGcDerTtOsgpYBXDAAQeMIbokCcZXEPcAU8Dmbnl/4Ovd7UOANVW1CSDJRuCsJDcBJwK/DOwJPCPJdFXdNHjHVXU5cDlAr9erMeWXpCe9cZ1iuh44FSDJHvSvNdzabbsdOCLJTDmtBLZU1Rer6vlVdQxwJnDJ7HKQJM2dsRxBVNW1SY5NsrZbtRroJTm8qi5NciWwJgn0rz+cPo4ckqTHL1UL9yxNr9er6enpSceQpAUlyYaq6o0a5xflJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkposCElSkwUhSWqyICRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDUNLYgk+81anhpvHEnSfLHVgkjyWuDds1a/LcnrxhtJkjQfDDuC+D1g9ax15wK/O744kqT5YlhB7FRVDwyuqKotQMYbSZI0HwwriKdvZX2NI4gkaX4ZVhDfSrJ8cEWSlwHfHm8kSdJ8sGTIttXA1UkuBG4GesBZgBepJelJYKtHEFW1CXgV8CzgbOAg4DVVtXmOskmSJmjYEQRVdT9w0RxlmTPvveY2vvm9/510DEl6XA5+5u6854Tnj/1xtloQSa7isRek9wBWV9XXxppKkjRxWy2Iqnr97HVJ9gc+Cxw5zlDjNhfNK0kL3XbNxdRdl/jJmLJIkuaR7SqIJEcC940piyRpHtnWaxDpbm8C3jIHuSRJE7bN1yCSPA1YBfwVcOyYc0mSJmzkKaYkuyX5Q+A24AH6342QJC1yw6b73jfJ+4GrgTuAjVV1RVU9PFfhJEmTM+wI4hzg9cDZVfVJ4KE5SSRJmheGTbXxFuDVwBuSXA/st7WxkqTFZ+g1iKr6blW9GTgZWJfkpu4vzUmSFrlt+h5EVX2/qt5G/5TTy8cbSZI0H2zvN6k3V9W7tmVskvOTrEuytvuC3eC2k5KsT3JjknMH1l+Y5IYkNyf5te3JJknasYbO5vp4JVkBLK2q5Un2AK5K8qqqmpmm4xTgePrfyr4syWHAgcC9VXVMkqXAl5P8c1U9Mo6MkqThxlIQwArgCuhPGZ5kLXAo8I1u+4XAncA9XYbVwMPAjd0+W5L8D/79a0mamHEVxD7AXQPLm+h+C6o7OlhN/w8Q3QUcDTxcVbfNDE6yEljf+s5FklX0v9HNAQccMKb4kqTtugaxHe4BpgaW9wfu7m4fAqypqk1VVcBG+n/KFIAkvwIcVVUXt+64qi6vql5V9aamplpDJEk7wLgK4nrgVIDuGsRy4NZu2+3AEUlmjl5WAlu6scuBE4B3jCmXJGkbjeUUU1Vdm+TY7toD9E8p9ZIcXlWXJrkSWJME+tciTk9yFHANcMvAttOq6tvjyChJGi79szwLU6/Xq+np6UnHkKQFJcmGquqNGjeuU0ySpAXOgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJarIgJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkposCElSkwUhSWqyICRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJahpbQSQ5P8m6JGuTHDlr20lJ1ie5Mcm527KPJGluLRnHnSZZASytquVJ9gCuSvKqqvpJN+QU4HjgPuCyJIcBUyP2kSTNoXEdQawArgCoqvuBtcChA9svBO4E7gBO6G6P2keSNIfGcgQB7APcNbC8CdgPIMlSYDVwUDfmaODhYftIkubeuI4g7qF/ymjG/sDd3e1DgDVVtamqCtgInDVin59KsirJdJLpzZs3jyW8JGl8BXE9cCpAdz1hOXBrt+124IgkM0cvK4EtI/b5qaq6vKp6VdWbmpqavVmStIOM5RRTVV2b5Ngka7tVq4FeksOr6tIkVwJrkkD/+sPpVbVl9j5eoJakyUn/LM/C1Ov1anp6etIxJGlBSbKhqnqjxvlFOUlSkwUhSWqyICRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJarIgJElNFoQkqcmCkCQ1WRCSpCYLQpLUZEFIkposCElSkwUhSWqyICRJTRaEJKnJgpAkNVkQkqQmC0KS1GRBSJKaLAhJUpMFIUlqsiAkSU0WhCSpyYKQJDVZEJKkJgtCktRkQUiSmiwISVKTBSFJakpVTTrD45ZkM/Cdx7n7vsAPdmCccVtIec06Pgsp70LKCgsr7xPN+pyqmho1aEEXxBORZLqqepPOsa0WUl6zjs9CyruQssLCyjtXWT3FJElqsiAkSU1P5oK4fNIBttNCymvW8VlIeRdSVlhYeeck65P2GoQkabgn8xGEtKAleW2SN046hxavRV8QSc5Psi7J2iRHztr2q936G5N8OEkmlXMg07C8q5J8NclXkpw7qYwDebaadWDMzkkumOtsLaPyJvlWkhu6fy+eRMaBLKOy7gacAfzN3Kdb2Eb8jJ2UZH33njDxnzEYmffobv36JGfs8AevqkX7D1gBXNzd3gO4HtilW94ZuAHYtVt+L3DifM3brTuJfqkvAb4M7Dxfsw6M+2Ng/Xx+LXTrlgEfnHTObX1ugYuBF08660Ce84F1wFrgyFnbPtv9rN0AfBc4bb4+t8AXgb2AAB8BDpuvr4Uu43XA0u594bPAgTvy8Rf7EcQK4AqAqrqf/ov30G7bLsDbq+pH3fJtwFPnPOGjDctLVX0SeDmwCbitqh6eRMjO0KwASX4b2AjcNefpHmtU3pcAK7pPY2+eQL5BQ7MmeSlwMnBRkt+ZSMIBSVYAS6tqObASOC/JLjPbq+rXq+oY+v9f/wp8fCJB+0a9Di4E7gTuAE7obk/SsLz7ArdX1ZaqeoR+WSzbkQ++2AtiHx795rQJ2A+gqn5UVV8HSLI3cCJw1ZwnfLSt5p1RVf8CPBvYK8lRc5httqFZuzexA6tqvvxmyKjn9jrgcOCVwEuSTPILU6Oyvh14BfArwPIkL5jDbC0jPyx0TgU+OeEPNlt9bpMsBVYDB9F/oz0ZmGRWGP5a+AFwcJL9kjyNfjnfsiMffLEXxD3A4NfJ9wfuHhyQZHfgw8DvV9WP5zBby1bzJtkzyXMAquqHwEeBl815wp8ZlnUP4CLgmCQ3AK9Ict6cJ3y0oa+Fqrq3qh7p3rz+HnjhHOcbNOp1u1NV/WeX9RPAEXMZrmHkB5skTwVeB3x6DnO1DHtuDwHWVNWm6p/D2QicNcf5Zttq3i7jO+g/pxuAW6rq3h354Iu9IK6n/6ll5k1rOXDrzMbuQt+lwDur6vuTCDjL0LzAR5Ls3N1+PfC1OU33aFvNWlX3V9WRVXV0d2rhpqp696SCdraaN8neST6XDvAb9E+FTMqo18HuSWbegI8D/mNO0z3WyA9iwJuAj3ZvapM07Lm9HTgiyZJueSWwZa4DzjL0tVBVN9N/DdwOvG9HP/iiLoiquhZ4JMla4HPA2UAvyZuTPIP+BakXAX/d/ebKRM/nDstbVfcBHwPWdds3V9Xa+Zh1UpmGGfHc3gt8HlgP3Ej/k9iG+Zi1G/JO4DNJ1gF3VdX6CUWdMeqD2NOBV1bVNRNJN2DE6+Ae4EpgTZIb6Z/Gu2hyabf55+w9wCVV9cCOfny/KCfpCUvyAfq/QAH98/gFHF5VlyZ5F/CVqrpuYgEXqST7A++rqlVjuX8LQtI4JXl+Vd026RzafhaEJKlpUV+DkCQ9fhaEJKnJgpAkNVkQetJLcmqSEyedoyXJ1ZPOoCcvC0KS1GRBaFFL8tIk00muS/LxJC8c2HZxkmXd4qFJPt99YfLzSX5hyH2e102vfFm3/HNJ/rbb9/okb01ycbftH5JclP4U7V9IcmiSq5PckuTMbsw5Sc5Mcm13H59J8qxZj7lbknOTXJPkS0kuSLJrkucm+Vo3yeCkp9zQImNBaLF7IXABcDz9GTq35nBgZtbRM4DLRow9Bnhbt3wJ8CfdvscCzx0YezDwqap6GfAF4E+B3+zu4zUD4w4Ejuvu4+zG458HHAXsBjwFOBp4A/CLwN/Rn7jv34ZklrbbktFDpAXto8Ab6b/pP8SjPxQ9beD2p6rqQYCquiPJd5PsvZXJz1YBfwB8g/50LU+pqulu30eSfKx7TID/rqqZObNuA3afmRQyyfe6ySIBPtZN2UxVfTPJgwNzAgEc3JUH3b5Lquqh7vae9KffuAR4cFufGGkUjyC02O1eVVfQn2EU4AUASX6e/lHFjJOT7NptWwY8e8jMmA9U1fuBM7o38QeTvKjbdyd+Vg4APxy4XTx68rfiZz+Dp81MxJjkl+iXzkMDY+9I9+dFu8f4i+501e70jyBuAM4c9kRI28sjCC12r07yJmBX4LeAP09yGv3TTV8eGPdV+hPgPR34P/qzjz5G9+b8we5N/N+r6qEkbwU+1M2w+hD96cKft505vwX8Y/rz+t8LzJ708O3Ah9P/I0xPAS6tqluTHA2cA+xO/8hG2mGcakOasCTnAFdX1Q79Yy/SE+URhLQVSV5N/9z+oH+qqg9MIo801zyCkCQ1eZFaktRkQUiSmiwISVKTBSFJarIgJElNFoQkqen/ARzPp2JWCNxgAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = subsample_s\n",
    "\n",
    "plt.plot(x_axis, test_means)\n",
    "plt.xlabel( 'sub_samples' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "    这怎么回事？？那就还是默认数值0.7吧"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （6）调参colsample_bytree\n",
    "\n",
    "    此时，eta=0.1、n_estimators=42、max_depth=6、num_leaves=70、min_child_samples=40、sub_samples=0.7。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Fitting 5 folds for each of 8 candidates, totalling 40 fits\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.\n",
      "[Parallel(n_jobs=4)]: Done  10 tasks      | elapsed:    8.1s\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0.8331357485920589\n",
      "{'colsample_bytree': 0.4}\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[Parallel(n_jobs=4)]: Done  40 out of  40 | elapsed:   26.9s finished\n"
     ]
    }
   ],
   "source": [
    "# 1、设置参数范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.1,\n",
    "          'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 70,\n",
    "          'max_depth': 6,\n",
    "          'min_child_samples':40,\n",
    "          'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          #'colsample_bytree': 0.7,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "colsample_bytree_s = [i/10.0 for i in range(2,10)]\n",
    "tuned_parameters = dict( colsample_bytree = colsample_bytree_s)\n",
    "\n",
    "\n",
    "# 2、交叉验证找最优参数\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "grid_search = GridSearchCV(lg, n_jobs=4, param_grid=tuned_parameters, cv = kfold, scoring=\"roc_auc\", verbose=5, refit = False)\n",
    "grid_search.fit(X_train , y_train)\n",
    "\n",
    "\n",
    "# 3、打印结果\n",
    "print(grid_search.best_score_)\n",
    "print(grid_search.best_params_)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**画图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('mean_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n",
      "/Users/apple/anaconda3/lib/python3.7/site-packages/sklearn/utils/deprecation.py:125: FutureWarning: You are accessing a training score ('std_train_score'), which will not be available by default any more in 0.21. If you need training scores, please set return_train_score=True\n",
      "  warnings.warn(*warn_args, **warn_kwargs)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY0AAAEICAYAAACj2qi6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xl8FPX9x/HXJzeBHBzhlENuuQMRlSNA0aIoeGPVqnhbbStqPbD1V1uPHop4X8UTbb2ttt4oBBSqQJZbQFDYgBwBsiEQcn9+f+wEY4RkQ7KZ7O7n+XjkYXZmduZN3MwnM9/5fr+iqhhjjDGBiHI7gDHGmNBhRcMYY0zArGgYY4wJmBUNY4wxAbOiYYwxJmBWNIwxxgTMioYxxpiAWdEwxhgTMCsaxhhjAhbjdoCG1qZNG+3WrZvbMYwxJqQsXbp0l6qm1bZd2BWNbt26sWTJErdjGGNMSBGRzYFsZ7enjDHGBMyKhjHGmIBZ0TDGGBMwKxrGGGMCZkXDGGNMwKxoGGOMCZgVDWOMMQGzomGCrrisnNmLNrGvuMztKMaYerKiYYJu7tqd3PHOaq56cQlFpeVuxzHG1IMVDRN02V4fUQILN+7mt//yUFZe4XYkY8wRsqJhgs7jzWNw51T+OKkfH6/ZwfS3VqKqbscyxhyBsBt7yjQtpeUVrNiSzy+P78qlI4/GV1jKQ59+Q0qzWH5/6jGIiNsRjTF1YEXDBNXabQUUl1WQ3iUVgGkn9sJXWMKsz7+jZfM4rhvX0+WExpi6sKJhgirbmwdAepeWAIgIf5zUn/wDpdz30TpSmsXyy+O7uhnRGFMHVjRMUHm8ebRLjqdjSsLBZVFRwn3nDqagqIw73llFcrNYJg/u6GJKY0ygrCHcBJUnx0d655Y/abuIjY7isQuHcmy3Vtz46jLmrdvpUkJjTF1Y0TBBs2tfMZt3Fx5sz6guITaaWZdk0Kd9Ete8tJQlm/Y0ckJjTF1Z0TBBs8zrA2Bo15aH3SY5IZYXLhtOx5RmXPr8YtZ8v7ex4hljjoAVDRM0npw8YqKEAR1TatyuTYt4Xrx8OC3iY7j42a/YtGt/IyU0xtSVFQ0TNNmbfRzTIZlmcdG1bntUy0RmXz6c8ooKfvnMl2zPL2qEhMaYurKiYYKivEJZvsXH0MO0ZxxKz7ZJvHDZcPL2l3Dxs1/iKywJYkJjzJGwomGCYv2OAgpLyg/2zwjUoKNS+cclGWzaXcjU5xaz30bGNaZJsaJhguKHTn2BX2lUGtGjDY+en87KrflcPXspxWU2Mq4xTYUVDRMUHq+P1s3j6NIq8Yje//P+7fnb2YP4fMMupr2yjPIKG+DQmKbAioYJCo83j/QuqfUakPCcYUdxx2n9+GDVdm63kXGNaRJsGBHT4HyFJWzM3c9ZQ4+q974uH3U0vsISHvlsA6nNY5l+yjENkNAYc6SsaJgGtyzH36kvvXPd2zMO5caTeuMrLOWprG9JbRbHr8b2aJD9GmPqzoqGaXAeZ6a+QQ1UNESEP032j4z7tw/XkpoYy/nDuzTIvo0xdWNFwzS4bG8evdsl0SK+4T5eUVHCjCmDKSgq5fa3V5KcEMupgzo02P6NMYGxhnDToCoqlGU5vjr3zwhEbHQUj184jIyuLZn2qof563Mb/BjGmJoFrWiIyL0iMl9EskRkRLV1F4jIIhFZICJ3OcsSReQVEVkoIl+IyChneT8RmSsi80TkcRGJDVZmU3/f7tpHQVFZnXqC10WzuGhmXXIsPdsmcfXspSzdnBeU4xhjDi0oRUNExgOJqpoJTAburnayvwiYCGQCaSIyCGgPPKyqI4ArgQudbe8HLlbVscBq4LJgZDYNI9sZ2TYYVxqVUprF8uJlw2mXHM+lz33F2u02Mq4xjSVYVxrjgVkAqpoPZAEDq6yfAeQAm4BJQI6qfquqC0XkfmAJ8KyIxAH7VTXHed/TwIlBymwagMebR3JCDN3bNA/qcdKS4pl9+XE0i4vmome+wru7MKjHM8b4BatotAZ2VHm9HWgL/ttQwHSgJ9AN/xXFwXEiVPV3wGDg70AasLPKulIO0XgvIleJyBIRWZKba/e53eTx+tszoqKOvFNfoDq3SuSly4+jtNw/Mu7OvTYyrjHBFqyisRv/Cb9Se344+Q8A5qrqdvV38d0A3CoixzhXFqjqN8BGoAxoU7kT5xbXTwYiUtWnVTVDVTPS0tKqrzaNZF9xGet2FBzReFNHqle7JJ6/dDi79hVz0TNf2ci4xgRZsIrGZ8BUABFJwd92sdJZtxEYLiKVVwyTgUJnm4ud96QCXVV1B9BcRCqfrbwS+CRImU09Lc/xoRrc9oxDGdI5lX9cnMF3u/Zz2fOLKSyxkXGNCZagFA1VnQNUiEgW8C5wB5AhIteq6m7gBWCuiCwARgEzgWeAkSIyF3gT/y0sgNuAl0VkHtAfeDYYmU39eZyRbYc0UKe+uhjZsw0Pnz+EZTk+GxnXmCCScBsELiMjQ5csWeJ2jIh0+fOL2bynkDk3jnEtw2uLc7jlzRWcOrADD5+fTnQjtK0YEw5EZKmqZtS2nfUINw1CVfHk+Bjft62rOaYc25n8A6Xc8/7XJDeL5d4zB9RrpF1jzI9Z0TANYvPuQvbsL2n09oxDuTKzO3mFJTw+byOpibHcenJftyMZEzasaJgG4cnxt2cM7dr47RmHcvOEPvgOlPLEvI2kNovl6jE2Mq4xDcGKhmkQ2Zt9NI+LplfbJLejAP6Rce86fQB7D5Tylw/8I+Oed6yNjGtMfVnRMA3Ck5PH4M6pTarhOTpKeGDKEAqKypj+ln9k3FMG2si4xtSHjXJr6u1ASTlfbytgaBNoz6guLiaKJ345lPQuLbn+lWV8/s0utyMZE9KsaJh6W7HFR3mFNmpP8LpIjIvh2UuOpXtac66aveRgfxJjTN1Z0TD15nGmd3WjU1+gUhL9I+O2aRHP1OcWs35HgduRjAlJVjRMvXm8eXRrnUjrFvFuR6lR2+QEXrr8OOJjorjomS/J2WMj4xpTV1Y0TL2oKtne4MzUFwxdWicy+/LjKCp1RsYtsJFxjakLKxqmXrb6DpBbUNxk2zMOpU/7JJ679Fh27i3m4me+Iv9AqduRjAkZVjRMvXicmfqa4pNTNRnapSVPXzyMjbn7uPz5xRwosQEOjQmEFQ1TLx6vj4TYKPq0bxqd+upidK80HvpFOtnePH718lJKyircjmRMk2dFw9RLtjePQZ1SiY0OzY/SxIEduOfMgcxbl8tNry+nvCK8Rn02pqFZj3BzxIrLylnz/V4uHdnN7Sj1cv7wLuQfKOWvH6wlpVkMd51uI+MaczhWNMwRW/39XkrKK0LmyamaXDOmB3mFJTyV9S2pzeL43YQ+bkcypkmyomGOWPZmf8/qUHpyqia3ndyX/MJSHp27gdjoKH7zs55ENaGxtIxpCqxomCPmyfHRKbUZ7ZIT3I7SIESEe84cSFFpOTPnrGfRt7uYMWUInVKbuR3NmCYjNFsvTZOwzOsLm6uMStFRwszzhvD3cwaxcks+J8+cz9ueLYTbtMjGHCkrGuaI7NhbxFbfgbBoz6hORJiS0ZkPrs+kT/skbnh1Ob/+p4e8/SVuRzPGdVY0zBGpHCk23K40qurSOpFXrz6BW07uw8drtjPhwflkrc91O5YxrrKiYY6Ix+sjLjqK/h2T3Y4SVNFRwrVje/L2tSNJaRbLJc9+xf+9s8p6kJuIZUXDHJFsbx79OyUTHxPtdpRGMaBTCv/5zSguH3U0Ly7azKkPL2C5MyS8MZHEioaps9LyClZsySe9c/i1Z9QkITaaO07rx8tXHMeB0nLOfmIhD3/6DWXlNvxIICoqlA9WbrNiG+KsaJg6W7utgOKyCoZ2Dd/2jJqM7NmGD6dlctqgDjzwyXrOeXIR3+3a73asJktV+WztDiY+vIBfvZzNJc99xY69NiR9qLKiYeos+2AjeGRdaVSV0iyWB3+RziPnp/Nt7j4mPrSAl7/cbI/mVrN40x6mPLWIy55fwoHScv44qR9FpeX87vXlVNg4XyHJOveZOvN482ibFE/HlPDo1FcfkwZ35Nhurfjd68v5/durmLNmB387ZxBtkyL7Z7N2+17u+3Adn67dSVpSPHefMYDzju1MbHQUMdFR3PHvVcz+32YuGdHN7aimjuxKw9SZJ8fH0C4tbVA/R/uUBF68bDh3TurHwo27mTBzPh+u2u52LFfk7CnkhleXccpDC/hq0x5uObkPWTeP5ZfHdz04EvIvj+vC2D5p3Pv+12zYaXO1hxorGqZOdu8rZvPuwrDun3EkoqKEqSOP5r+/GUWnls245qWl3Pz6cgqKImNWwNyCYv74zip+NmMe76/cxtWZPVhwyziuHduTxLgf39AQEf5+9iAS46KZ9uoym8ckxFjRMHVSOVNfJLdn1KRXuyTe+tVIfj2uJ29mb/H/xf3dHrdjBc3eolJmfLyOMffN5aUvvZyb0Zmsm8dx2yl9SU2MO+z72iYn8JezBrJq614e/vSbRkxs6svaNEydeHLyiIkSBnZKcTtKkxUXE8XvJvRhXN80bnh1Oec9vYirM3tww0m9wqZfS1FpObMXbeaxeRvwFZZy2qAO3HhSb7qntQh4HycP6MA5w47i8XkbGNc3jWFdWwUxsWkoQbvSEJF7RWS+iGSJyIhq6y4QkUUiskBE7qqyfIaIzBORhSJySpXl86p8nRqszKZ2Hq+PYzok0ywuPE5+wTSsays+uH40vzi2M09mbeSMxxaybnto38MvK6/gtcU5jLt/Hve8/zWDjkrlv78ZxaMXDK1Twaj0x0n96JjajBteXc6+4rIgJDYNLShFQ0TGA4mqmglMBu4Wkdgqm1wETAQygTQRGSQiZwB7VHUscCLwBxGJEpE4YJuqjnW+3gtGZlO78gpleU74jWwbTM3jY/jLWYOYdXEGuQVFTHr0c2Yt+DbkHjdVVT5ctY0JD87nljdX0C45gX9eeRwvXjacAfW46kxKiOWBKUPIySvk7v+uacDEJliCdaUxHpgFoKr5QBYwsMr6GUAOsAmY5Hz/DfCk855CYAsgwGBgmHOV8acg5TUBWL+jgP0l5VY0jsCJ/drx4bRMMnulcfd7X3PhrC/Z6jvgdqyALNywizMe+4JrXspGRHjqomG8fe0IRvRo0yD7H350K64Z04NXFufwyZodDbJPEzzBKhqtgar/97cDbQFEJBGYDvQEugEXAuWqulpVdzvbTAYWqWo5sA7o71yBRInI2UHKbGpR2Qg+1BrBj0ibFvH84+Jh/O3sgSzf4uPkB+fzb8/WJtshcOWWfC565ksumPUluQXF3HfOID6alsmE/u0b/HHrG07sTb8Oydz25gpyC4obdN+mYQWraOwG0qq8bg/sdL4fAMxV1e3q/23ZANxauaGIjANGq+qDAKq6V1Urn1t8BUivfjARuUpElojIktxcG7o6WLK9ebRqHkeXVoluRwlZIsJ5x3bhg+tH07tdEtNeXcav/+XBV9h05urYmLuP617OZtKjn7Nqaz5/OPUYPvvdWM7N6Ex0kKa/jYuJ4sFfDKGguIzb3lzRZAupCV7R+AyYCiAiKfjbLlY66zYCw0Wk8smtyUChs20m/ttVtzivo0RkjojEO9tOAZZWP5iqPq2qGaqakZaWVn21aSAebx7pnVOtU18D6Nq6Oa9dfQI3T+jDR6v8c3XMd3mujm35B5j+1gp+PnM+c9ft5LfjezH/lnFcMbo7CbHBf/Chd7skbj25L5+u3ckri3OCfjxzZILyyK2qzhGRE0Uky1k0HcgQkXRVfVxEXgDmOiefHOAKERkN/AdYVmXdpcAjwDwRKQW+UNW3g5HZ1Cy/sJSNufs5a+hRbkcJG9FRwnXjejKmdxrTXl3Gxc9+xSUndOW2U45p1KfTfIUlPDFvI88v3ESFKhcd35Vf/6wnbVrE1/7mBnbpiG58tnYHd/13DSd0b023Ns0bPYOpmYTbZWBGRoYuWbLE7RhhZ966nUx9bjH/vOI4RvRsmAZQ84Oi0nL+9uFanvtiEz3SmjPzvCEMOiq4DxwUlpTx3BebeDJrI/uKyzgzvRM3nNibzi7fftyWf4AJM+fTo20LXr/6BGKirQ9yYxCRpaqaUdt29n/DBMTj9RElMKizPTkVDAmx0fxxUn9euvw49heXc9bjC3kkSHN1lJRVMHvRJjL/Po/7PlrHcUe35sPrM3lgyhDXCwZAh5Rm3H3mQDxeH4/P2+h2HFON9Qg3AfHk+OjdLokW8faRCaZRvdrw0bRM7nhnFTM+Wc9n63Yyc8qQBrlNU1Gh/GfF98z4eD3ePYUM79aKpy4a2iR7Yk8e3JE5a3bw0KffMKZ3GoPtj5Umw640TK0qKtTfCG6P2jaKlMRYHj4/nYd+MYSNO/cx8eEF/Osr7xE/UaSqzF27k1Mf+ZzrX1lG8/gYnrv0WF69+vgmWTAq3XX6ANomxXPDa8tsTvYmxIqGqdW3u/ZRUFRmnfoa2elDOvHhtEzSu6Qy/a2VXPHCkjr3YVi6eQ/nPf0/Ln1+MfuLy3joF0N47zejGNenbZN/Ci4lMZb7zx3Mt7n7+csHX7sdxzisaJhaZR/s1GdFo7F1TG3G7MuO4/9O68eCDbuY8OB8Plpd+1wd67YXcMULSzj7Cf9UtHedMYA5N47h9CGdiApSX4tgGNmzDZePOpoXF21m3rqdtb/BBJ0VDVMrjzeP5IQYurep+4B0pv6iooTLRh3Ne78ZRYeUBK6evZRb3jj0AH85ewq58bVlnPzQfL78bjc3T/BPgnTR8V2JiwnNX/ebJ/Shd7sW3PzGCvL2N51OkJEqND9FplF5vD6GdGkZUn+hhqNe7ZJ4+9qRXDeuB28s3cIpD81n8Sb/XB279hVz57ur+dmMeby3YhtXZXZnwS3juG7cTydBCjUJsdE8eF46vsISbn97pfUWd1lof5pM0O0rLmPdjgIm9G/vdhSDf7iNmyf0ZVyfttz42nKmPLWIk/u3J2t9LsVlFUzJ6Mz143vRPszmb+/XMZmbft6Hv36wlreyt3L2MOtk6ha70jA1WpHjQxWGdrUnp5qSjG6teP/60ZyX0ZkPVm1nXN+2fHJDJn85a2DYFYxKV47uzvBurfjju6vJ2VPodpyIZUXD1CjbmwfAkCD3TjZ11yI+hr+ePYhVf5rAY0c4CVIoiY4SZkwZDMBNry2nPMTmJAkXVjRMjTxeHz3SmpOSGFv7xsYVkdThsnOrRO6c3J+vNu3hHwu+dTtORLKiYQ5LVfHk+Gz+DNOknD20E6cMaM+Mj9ex5vu9bseJOFY0zGFt3l3Inv0l1hPcNCkiwj1nDiQ1MY5pr3ooKrXe4o3JioY5LE+Ovz3DeoKbpqZV8zjuO2cQ63fs4/6P1rkdJ6JY0TCH5fH6aB4XTe92SW5HMeYnxvZpy0XHd2XW59+xcMMut+NEDCsa5rCyvXkM7pwatCk+jamv2yceQ/c2zbnp9eXkHyit/Q2m3qxomEM6UFLO19sK7NaUadKaxUUz87wh5BYU83/vrHI7TkSwomEOaeXWfMorlPTO1ghumrbBnVP57fhevLPse95d/r3bccKeFQ1zSJWd+uxKw4SCa8f2IL1LKn94eyXb8g+4HSesWdEwh+Tx5tG1dSKtW8S7HcWYWsVERzFzyhBKy5Xfvb6cCustHjRWNMxPqCrZXh/pNsWmCSHd2jTnjtP68cWG3Ty/cJPbccJWjUVDRNpWe50W3DimKdjqO0BuQbENUmhCzvnDOzO+b1v++uFavtlR4HacsHTYoiEipwF/qLb4RhE5PbiRjNs8zkx91ghuQo2I8NezB5EUH8P1ryyjpKzC7Uhhp6YrjeuB6dWW3QX8NnhxTFPg8fpIiI2ibwfr1GdCT1pSPH85ayBrtu3lwTnr3Y4TdmoqGlGqur/qAlUtBKynV5jL9uYxqFMqsdHW5GVC08/7t+e8jM48mbXx4OyGpmHUdFZofpjl9lhCGCsuK2fN93vtUVsT8u6Y1I+jWiZyw6vLKCiy3uINpaai8bWIZFZdICLHA98FN5Jx0+rv91JSXmFFw4S8FvExzDxvMN/7DvDn/6xxO07YqKloTAf+LiLnikgnpwH8AX7azmHCyMFGcBsO3YSBYV1bce3Ynry+dAsfrtrudpywcNiioarbgQnAUcAdQE/gVFXNbaRsxgXZ3jw6pTajXXJ4zjNtIs9vx/diQKdkbn97JTsLityOE/JqbOlU1XxVnamq16jqDFXNa6xgxh3LvD6G2K0pE0biYqJ48Lwh7C8u49Y3VqBqzbL1UVM/jbdF5K1qX5+KyPDGDGgaz469RWz1HbCe4Cbs9GybxPRT+jJ3XS4vf+l1O05IO+yM9Kp6ZvVlItIeeAsYEcxQxh0eZ5BC6wluwtHFJ3Tj07U7uee9rxnRozXd01q4HSkk1elBfKedI6Bn10TkXhGZLyJZIjKi2roLRGSRiCwQkbuqLJ8hIvNEZKGInOIs6ycic53lj4tIbF0ym8B5vD7ioqPo3zHZ7SjGNLioKOH+cwcTFxPFDa8tp7TceosfiToVDefk7wtgu/FAoqpmApOBu6ud7C8CJgKZQJqIDBKRM4A9qjoWOBH4g4hEAfcDFzvLVwOX1SWzCZzH66Nfx2TiY6LdjmJMULRLTuDeMweyPMfHY3M3uB0nJB329pSIvM0PHfnE+X478OsA9jsemAX+xnQRyQIGAtnO+hlADrDbyTAdKAcWOO8pFJEtQCywX1VznPc9DfwTeCrAf58JUGl5BSu2+rhgeFe3oxgTVKcO6sCnX3fikc82MKZ3mj1eXkc1PXJ7pqqe5XydCVwIrAOeC2C/rYEdVV5vB9oCiEgi/iLRE+jm7LdcVVer6m5nm8nAIqAVsLNKplJqKHTmyK3dVkBRqXXqM5HhztP70z45gRtfW05hSZnbcUJKrbenRCRJRG7Hf2toP/6+G7XZDVQdRr09P5z8BwBzVXW7+p992wDcWuV444DRqvqgs582VdbF4r8iqZ7xKhFZIiJLcnOtG8mR8OTYTH0mciQnxDJjymA27d7PPe997XackFLTI7dtROQe4N/AJmCDqs5S1Z+ctA/hM2Cqs58U/G0XK511G4HhIlJ5xTAZKHS2zQQmAbcAqGoJ0FxEOjjbXgl8Uv1gqvq0qmaoakZamk35cSSyN+fRNimeTqnN3I5iTKM4vntrrhzdnZe/9DJ37c7a32CAmq807gTOBO5Q1X8CAV/DqeocoMJpy3gXf4/yDBG51rkF9QIwV0QWAKOAmSIyGvgPMMxZN09EjgZuA14WkXlAf+DZOv4bTQA8OT7Su6QiYoMYm8hx089707d9Eje/sYLd+4rdjhMSpKbekSLSBf9Juy+QrKoZjRXsSGVkZOiSJUvcjhFSdu8rZtjdc7jtlL5cM6aH23GMaVRrt+9l8iNfMLZPGk9dNCxi/3ASkaWBnONrG0bEq6rX4m+sni8inzsz+pkwUjlI4VB7isREoL7tk7l5Qh8+XrOD15ducTtOkxdQPw1V3aaqN+K/XXVCcCOZxubJySM6ShjYKcXtKMa44vJRR3N891b86d3V5OwpdDtOk1bXHuG5qvr7YIUx7vB4fRzTIYlmcdapz0SmqChhxpQhRIlww6vLKK+wQQ0Px+bzjHDlFcryHJ/dmjIRr1NqM/58Rn+WbM7jyayNbsdpsqxoRLj1OwrYX1Ju/TOMAc4Y0olTB3Vg5ifrWbU13+04TZIVjQh3cKa+znalYYyIcM8ZA2jdIo5pry6jqDSQbmmRxYpGhPN482jVPI6urRPdjmJMk5CaGMf95w5mw859/O3DtW7HaXKsaES4bG8e6Z2tU58xVY3ulcbUEd147otNTH9rBfmFAc0IERGsaESw/MJSNubut/YMYw5h+sS+XJXZndeWbGH8A1m8u/x7myoWKxoRbdkWpz3Dnpwy5ifiY6K5feIxvPvrkXRKTeC3//Iw9bnFEd+Pw4pGBMvenIcIDLY5wY05rP4dU3jr2pHcOakfSzbt4aSZWTyZtTFiZ/6zohHBPDk++rRLokW8TVFiTE2io4SpI49mzk1jyOyVxl8/WMukRz7H481zO1qjs6IRoSoqlGXePGvPMKYOOqQ04+mLM3jqomH4Cks564mF/N87qygoipyGcisaEerbXfvYW1Rm7RnGHIEJ/dsz56YxXHJCN2b/bzMnPpDFh6u2RURDuRWNCJV9cGRbu9Iw5ki0iI/hzsn9+fe1I2ndPJ5rXsrmyheXsNV3wO1oQWVFI0J5vD6SE2Lo3qaF21GMCWmDO6fy7q9H8vuJx/DFht2c9EAWz3z+XdgOemhFI0J5vHkM6dKSqCjr1GdMfcVER3FlZnc+viGT445uxV3/XcMZj30RluNXWdGIQPuKy1i3o4B0e9TWmAbVuVUiz049lkcvSGf73iImP/o5d/13DfuLA54tu8mzohGBVuT4UMWenDImCESE0wZ1ZM6NYzh/eBee+fw7TnogizlrdrgdrUFY0YhA2c6z5TayrTHBk9IslnvOHMibvzqBFgkxXPHiEn710lJ27C1yO1q9WNGIQB6vjx5pzUlJjHU7ijFhb1jXVvz3N6O5eUIfPlu7kxNnZDF70aaQbSi3ohFhVBVPjs/6ZxjTiOJiorhuXE8+mpbJ4M6p3PHOas5+YiFfb9vrdrQ6s6IRYbx7Ctmzv8TaM4xxQbc2zZl9+XBmnjcY755CJj3yOX/9YC0HSkJnsicrGhGmsj3D5gQ3xh0iwpnpR/HpjWM4a2gnnszayM8fzCJrfa7b0QJiRSPCeLw+EuOi6d0uye0oxkS0ls3j+Ps5g3nlquOJjY7ikme/4rf/8pBbUOx2tBpZ0YgwHq+PwUelEm2d+oxpEo7v3poPrh/N9eN78eGq7YyfMY9/feWlook2lFvRiCAHSsr5ettehna19gxjmpL4mGhuOKk3718/mmM6JDP9rZWc9/QivtlR4Ha0n7CiEUFWbs2nrEKtf4YxTVTPti145arj+fvZg1i/Yx8TH17AjI/XUVTadBrKrWhEkMoJY4bYk1PGNFkiwpRjO/PpTWM4dWAHHvlsA6c8tICFG3a5HQ2wohFRsr15dG2dSJsW8W5HMcbUok2LeB70T1P5AAAQ1ElEQVT8RTqzLx9OhSoXzPqSm15bzp79Ja7msqIRIVSVbK/PBik0JsSM7pXGR9MyuW5cD95ZtpXxM+bxxtItrk34ZEUjQnyfX0RuQbH1BDcmBCXERnPzhL6899vRHN2mOb97fTkXzvqS73btb/QsQSsaInKviMwXkSwRGVFt3QUiskhEFojIXVWWny4im0RkSLXt51X5OjVYmcNZ9mZnkEJrzzAmZPVpn8Qb14zg7jMGsHJrPhMenM8jn35DSVlFo2UIStEQkfFAoqpmApOBu0Wk6uh4FwETgUwgTUQGAajqO8Dz1fYVB2xT1bHO13vByBzuPF4f8TFRHNMh2e0oxph6iIoSfnl8Vz69cQwn9WvHjE/WM/HhBSzetKdxjh+k/Y4HZgGoaj6QBQyssn4GkANsAiY53x/OYGCYc5Xxp6CkjQCenDwGHZVCbLTdkTQmHLRNTuCxC4by3NRjOVBSzrlPLuKu/64J+nGDdQZpDVSdcWQ70BZARBKB6UBPoBtwIVDTQ8jrgP6qOhaIEpGzg5A3rBWXlbN6615rzzAmDI3r25ZPbszkqszudGudGPTjBato7AbSqrxuD+x0vh8AzFXV7epv/t8A3Hq4HanqXlUtdV6+AqRX30ZErhKRJSKyJDc3NAb9akyrv99LSXkFQ609w5iwlBgXw+0Tj+GiE7oF/VjBKhqfAVMBRCQFf9vFSmfdRmC4iMQ4rycDhYfaiYhEicgcEansWDAFWFp9O1V9WlUzVDUjLS2t+uqI5/H6AOxKwxhTbzG1b1J3qjpHRE4UkSxn0XQgQ0TSVfVxEXkBmCsi4G/PuOIw+6kQkUeAeSJSCnyhqm8HI3M4y/bm0TElgXbJCW5HMcaEuKAUDQBVve0Qixc5694E3jzM++6s9vod4J2GzhdJlnl9pHe1qwxjTP3ZozRhbsfeIrb6DlhPcGNMg7CiEeasPcMY05CsaIQ5jzePuOgoBnSyTn3GmPqzohHmPF4f/TomEx8T7XYUY0wYsKIRxkrLK1ix1WfjTRljGowVjTC2dlsBRaUV1p5hjGkwVjTCmCfHP7Kt9QQ3xjQUKxphzOP1kZYUT6fUZm5HMcaECSsaYSzbm0d651ScnvfGGFNvVjTC1O59xWzeXchQ6wlujGlAVjTC1LIcp1Of9QQ3xjQgKxphKtubR3SUMPCoFLejGGPCiBWNMOXx+jimQxKJcUEbk9IYE4GsaISh8gpleY6P9M7WnmGMaVhWNMLQ+h0F7C8pt57gxpgGZ0UjDFWObDvUeoIbYxqYFY0w5PHm0TIxlq6NMMm8MSayWNEIQ9nePNK7tLROfcaYBmdFI8zkF5ayMXe/jTdljAkKKxphZtkWm6nPGBM8VjTCjMebhwgMsk59xpggsKIRZrK9Pnq3TSIpIdbtKMaYMGRFI4xUVCjLvHkM7WrtGcaY4LCiEUa+3bWfvUVl1hPcGBM0VjTCSLbXP1Of9QQ3xgSLFY0w4vH6SEqIoUdaC7ejGGPClBWNMOLx5jGkcypRUdapzxgTHFY0wsS+4jLW7yiw/hnGmKCyohEmVuT4qFCsJ7gxJqisaIQJjzO96xCb3tUYE0RWNMJE9uY8uqc1JzUxzu0oxpgwZkUjDKgqnhyfzZ9hjAm6oBUNEblXROaLSJaIjKi27gIRWSQiC0TkrirLTxeRTSIypMqyfiIyV0TmicjjImLjY1Tj3VPInv0l1j/DGBN0QSkaIjIeSFTVTGAycHe1k/1FwEQgE0gTkUEAqvoO8Hy13d0PXKyqY4HVwGXByBzKDnbqs57gxpggC9aVxnhgFoCq5gNZwMAq62cAOcAmYJLz/U+ISBywX1Ur1z8NnBicyKHL4/WRGBdN73bWqc8YE1wxQdpva2BHldfbgbYAIpIITAd6OtuMAcpr2M/OyheqWioiP8ksIlcBVwF06dKlAeKHFo/Xx+CjUomJtiYqY0xwBesssxtIq/K6PT+c/AcAc1V1u6oqsAG4tYb9tKl84dzi+kmBUdWnVTVDVTPS0tKqrw5rB0rK+XrbXmvPMMY0imAVjc+AqQAikoK/7WKls24jMLzKFcNkoPBQO1HVEqC5iHRwFl0JfBKkzCFp5dZ8yirUeoIbYxpFUG5PqeocETlRRLKcRdOBDBFJV9XHReQFYK6IgL8944oadncb8LKIROFvCJ8WjMyhymMj2xpjGlGw2jRQ1dsOsXiRs+5N4M3DvO/Oaq9XAT9r6HzhwuP10aVVIm1axLsdxRgTAazlNISpKtnePLvKMMY0mqBdaYSaTbv2c81LS+nRtgU901rQs63/6+g2zUmIjXY73iF9n1/EzoJi6wlujGk0VjQcpeUVdEptxsot+by/chuq/uVRAp1bJR4sJD3a/lBQkhPc7Zxu7RnGmMZmRcPRq10Sz0w9FoCi0nK+zd3Phtx9bNi5j407/f9d8M0uSsorDr6nbVL8wQLSs8oVSlpSPE4jf1Blb/YRHxNF3/bJQT+WMcaAFY1DSoiNpl/HZPp1/PHJuKy8gpy8A2xwisiGnfvYkLuPt7K3sq+47OB2SQkxPyoilV9HtUwkugFn1fPk5DHoqBTiYqxpyhjTOKxo1EFMdBRHt2nO0W2ac1K/dgeXqyo79hY7haTg4BXK3HW5vL50y8Ht4mP87//R1YnTbhIfU7d2k+KyclZv3cvUkd0a6p9njDG1sqLRAESE9ikJtE9JYFSvNj9al19Yyobcgh9dnSzf4uO9au0mXVol/tBmUuUKJekw7Sarv99LSXkF6TbpkjGmEVnRCLKUxFiGdW3FsK6tfrT8QEk53+6q0mbiXJ1krc+ltFwPbtcuOf5Ht7oqG+KzN1c2gtuTU8aYxmNFwyXN4qLp3zGF/h1TfrS8rLwC757Cg+0llUXljaVb2F/yw7BbUQIdnasbY4xpLFY0mpiY6Ci6p7Wge1oLfl5luaqyfW/Rj25zZXSzqwxjTOOyohEiRIQOKc3okNKM0b0iayRfY0zTYc9qGmOMCZgVDWOMMQGzomGMMSZgVjSMMcYEzIqGMcaYgFnRMMYYEzArGsYYYwJmRcMYY0zARFVr3yqEiEgusLkeu2gD7GqgOMEWSlkhtPJa1uAJpbyhlBXql7erqtbaczjsikZ9icgSVc1wO0cgQikrhFZeyxo8oZQ3lLJC4+S121PGGGMCZkXDGGNMwKxo/NTTbgeog1DKCqGV17IGTyjlDaWs0Ah5rU3DGGNMwOxKw5gwIyKnichlbucw4Slii4aI3Csi80UkS0RGVFt3krN8gYg8IiLiVk4nT01ZrxKRL0XkfyJyl1sZq6opb5VtokXkb42d7RA5aswqIl+LyDzna5gbGavlqS1vEnAd8HLjpwtttfyeXSAii5xzguu/Z7VkHeMsXyQi1zX4wVU14r6A8cCDzvcpwGdArPM6GpgHJDiv/wSc0RSzOssuwF/8Y4BPgOim+rOttt3fgUVNOSvQDXjAzYx1/dkCDwLD3M7qZLkXmA9kASOqrXvL+T2bB3iBS5vyzxb4AGgJCPAkMKgpZnXyfQokOueFt4DuDXn8SL3SGA/MAlDVfPwf6oHOuljgZlUtcl6vBuIbPeEPasqKqv4TOAHYDqxW1fJD7aQR1ZgXQEQuATYAOxo93Y/VlvVYYLzzV9u1LuSrrsa8InIccCEwU0R+5UrCH7KMBxJVNROYDNwtIrGV61X1LFUdi//f5AFedCXoD2r7LMwAcoBNwCTne7fUlLUNsFFVC1W1An8B6daQB4/UotGaH5+wtgNtAVS1SFUXA4hIK+AM4O1GT/iDw2atpKpfAJ2BliIyuhGzHUqNeZ0TW3dVbQpPpdT2s/0USAd+BhwrIm538qot783AKGAckCkigxsxW3W1/vHgmAr8swn8sXPYn62IJALTgZ74T8AXAm7mrelzsAvoJyJtRaQZ/oK9rCEPHqlFYzdQtbt8e2Bn1Q1EJBl4BLhJVUsaMVt1h80qIqki0hVAVQ8A/wCOb/SEP1ZT3hRgJjBWROYBo0Tk7kZP+IMaPwequkdVK5wT2hvAkEbOV11tn9soVV3n5H0JGN6Y4aqp9Y8dEYkHTgdea8Rch1PTz3YAMFdVt6v/HtAG4NZGzlfVYbM6+W7B/zNdCixT1T0NefBILRqf4f8Lp/JElgmsrFzpNCY+DtymqtvcCFhFjVmBJ0Uk2vn+TOCrRk33U4fNq6r5qjpCVcc4tyY+V9U/uBWUGrKKSCsReVccwNn4b6O4qbbPQrKIVJ6Yfw6satR0P1brH2bANcA/nBOd22r62W4EhotIjPN6MlDY2AGrqPFzoKoL8f//3wj8uaEPHpFFQ1XnABUikgW8C9wBZIjItSLSAn+j11BgtvPUjGv3h2vKqqo+4HlgvrM+V1Wz3MoKNed1M9eh1PKz3QO8BywCFuD/i22pe2kD+tneBrwpIvOBHaq6yKWoUPsfZs2Bn6nqf1xJV00tn4XdwAvAXBFZgP8W4MymmLXKZn8EHlXV/Q19fOvcZ4wJChH5K/6HNMDfJqBAuqo+LiK/B/6nqp+6FjBMiUh74M+qelVQ9m9FwxjT2ESkv6qudjuHqTsrGsYYYwIWkW0axhhjjowVDWOMMQGzomGMMSZgVjRMxBGRqSJyRigcU0T+3RjvMSZQVjSMMcYELKb2TYxp+pzhVO7DP3xFIv7RUx8EHsY/fEUM8LSqzq7ynuOAx4B84EpV/VZEbsY/3lgp8CUwRFUniMidQAkwAUgFrgdOA44DDuDvJdweuBP/SMld8I84OkNV36mW9RzgHPyDy23HP5Judg3/tt84mWLx9/i/FTgLSFLVZ51tbgEWAzcBx4vII/h7ZXcBKlT1isMdV0Q6OO/r52T+HPiLM+CdMT9ij9yasCAi7wO/VtVvndcTgSuBe1R1iTPC6jP4i8QxgA9oB+zB36u2FDgX//hSt6uqisgJwFuq2sEpGq1U9bcikgasAE5W1eUicjX+E/QSYA4wSlW3i0gC/tFb78Q/DpQP/3hMTzjfV9qnqqcd5t81F3hCVV9zXl+Fv2jdD7yDv1jFAG+q6mRnm3+r6hmVhU5V73X+LYc8roi86vwsKrUA/qCqHwbwozcRxq40TMhzRiPeXlkwAFT1fRG5WlWXOK9LReQZYCT+QgH+AR4vwz9p0aP4h4e4r3IsJFVdJCJfVznUv5zluSKyWlWXO8vX88MIru+r6nZnuyIReRn/IJKVf7WPwD+m2cETctUhww+hsLJgOJ4BXlPVCqdQTgaSgVcP8/65ARy3tTMW2MHlqlpaQyYTwaxNw4Q8Z5yoo0WkZ+UyETkV//g8/ZzXMcDFwP+qvDVZVWfhv1XzC/xjTF3tDFCIiIzEf1VS6UCV7/dVjcAPv0sTRaSj8/54/JNkVT3ml8CNzjpE5ESgppngmonIhVVeX45/PCyA5/CP73Quhy8alZlrOm6F8xrn6ui1KgMfGvMjdqVhwsWVwAwRScV/Av8YuBZ4SETa4G8PeFZVF4pIb+c9J4vINUACcJ6qbhaRLvgHgCzDX0RW1DFHFvBnp4BF42/TWCMiwwFU9XMReQP/4HdlwLdATQNi7gVaiMhn+H9fFwG3O/sqEpHVgE9Vy6q8p62ITK26k1qOexnwiIj8wTnGXapafURaYwBr0zCmwYhIN2Caqk5rxGO+Dlyuqnsb65gmstmVhjFNgHNlMLXa4udV9fka3jMI2GwFwzQmu9IwxhgTMGsIN8YYEzArGsYYYwJmRcMYY0zArGgYY4wJmBUNY4wxAbOiYYwxJmD/DwLpbBPst4OGAAAAAElFTkSuQmCC\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# plot CV误差曲线\n",
    "test_means = grid_search.cv_results_[ 'mean_test_score' ]\n",
    "test_stds = grid_search.cv_results_[ 'std_test_score' ]\n",
    "train_means = grid_search.cv_results_[ 'mean_train_score' ]\n",
    "train_stds = grid_search.cv_results_[ 'std_train_score' ]\n",
    "\n",
    "x_axis = colsample_bytree_s\n",
    "\n",
    "plt.plot(x_axis, test_means)\n",
    "plt.xlabel( 'colsample_bytree' )\n",
    "plt.ylabel( 'AUC' )\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### （7）再次调参n_estimators\n",
    "\n",
    "    此时，eta=0.1⬇️0.01、n_estimators=42、max_depth=6、num_leaves=70、min_child_samples=40、sub_samples=0.7、colsample_bytree=0.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/.local/lib/python3.7/site-packages/lightgbm/basic.py:755: UserWarning: categorical_feature keyword has been found in `params` and will be ignored.\n",
      "Please use categorical_feature argument of the Dataset constructor to pass this parameter.\n",
      "  .format(key))\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best n_estimators: 550\n",
      "best cv score: 0.8470906571012028\n"
     ]
    }
   ],
   "source": [
    "# 1、设置参数范围\n",
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.01,\n",
    "          #'n_estimators':n_estimators_1,\n",
    "          'num_leaves': 70,\n",
    "          'max_depth': 6,\n",
    "          'min_child_samples':40,\n",
    "          'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.4,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "\n",
    "# 2、交叉验证找最优参数\n",
    "n_estimators_2 = get_n_estimators(params , X_train , y_train, early_stopping_rounds=50)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3、训练模型（refit）\n",
    "\n",
    "    此时，eta=0.01、n_estimators=550、max_depth=6、num_leaves=70⬆️75、min_child_samples=40、sub_samples=0.7、colsample_bytree=0.4"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/apple/.local/lib/python3.7/site-packages/lightgbm/basic.py:755: UserWarning: categorical_feature keyword has been found in `params` and will be ignored.\n",
      "Please use categorical_feature argument of the Dataset constructor to pass this parameter.\n",
      "  .format(key))\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "LGBMClassifier(boosting_type='goss',\n",
       "        categorical_feature=[0, 1, 3, 5, 6, 12, 15, 16, 17, 18, 19, 20],\n",
       "        class_weight=None, colsample_bytree=0.4, importance_type='split',\n",
       "        is_unbalance=True, learning_rate=0.01, max_depth=6,\n",
       "        min_child_samples=40, min_child_weight=0.001, min_split_gain=0.0,\n",
       "        n_estimators=550, n_jobs=4, num_leaves=75, objective='binary',\n",
       "        random_state=None, reg_alpha=0.0, reg_lambda=0.0, silent=False,\n",
       "        subsample=0.7, subsample_for_bin=200000, subsample_freq=0)"
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "params = {'boosting_type': 'goss',\n",
    "          'objective': 'binary',\n",
    "          'is_unbalance':True,\n",
    "          'categorical_feature': [0,1,3,5,6,12,15,16,17,18,19,20],\n",
    "          'n_jobs': 4,\n",
    "          'learning_rate': 0.01,\n",
    "          'n_estimators':n_estimators_2,\n",
    "          'num_leaves': 75,\n",
    "          'max_depth': 6,\n",
    "          'min_child_samples':40,\n",
    "          'subsample': 0.7,\n",
    "          #'bagging_freq': 1,\n",
    "          'colsample_bytree': 0.4,\n",
    "          #'verbosity':5\n",
    "         }\n",
    "\n",
    "lg = LGBMClassifier(silent=False,  **params)\n",
    "lg.fit(X_train, y_train)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**特征重要性图**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX4AAAD7CAYAAABt0P8jAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAD+5JREFUeJzt3W+MXNV9xvHvY2GITAOJzFITtYkrVa1qBARl8wbVxgGUNKAkSFRUVgOiKFhpEKpftGDyR03ekEhgKW5UlDpUREqxUCLFEET6AojXcaAoWihJaqoUqaUyUYwXUzkVBLGxf32xl3a6GdjZ2T/j3fP9SCvvnHvu3HMvwzNnf3funVQVkqR2rBn1ACRJy8vgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXmtFEPoJ9zzjmnNm7cOOphSNKK8dRTT71UVWOD9D0lg3/jxo1MTk6OehiStGIk+c9B+1rqkaTGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4JekxpySV+5K+j8bdz48cN/nv3TVEo5Eq4UzfklqjMEvSY0x+CWpMQa/JDVmzuBPsi7J/UmeSPJ4kj9MsinJ/iQTSe5Osrbre0uSHyQ5mOTqru2sJN9MciDJd5JsWOqdkiS9uUE+1bMB+JuqeiLJJuAW4D3A9VV1OMnNwI1JHgO2ApuBtcBDSQ4CO4C9VfVAkouAO4HrlmBfJEkDmHPGX1X/3oX+XcAk8HXglao63HXZA1wBXAbcWzNeB/YClwIXVtUD3XP9CDhr8XdDkjSogWv8VfWXwEXAl4CXetqnmfnLYT3wYs8qR4BzgZOznurVJG+b/fxJtieZTDI5NTU1+B5IkuZlkBr/HyQ5HaCqngP+A/i9nuVrgRPAMaD3+x43AEf7POWZVfXa7Maq2lNV41U1PjY20NdGSpKGMMiMfwtwPUCSdwC/DfwyyXnd8puAR4D9dLX77o1iGzABHEpyZdd+IXB8EccvSZqnQU7u/j3wtSR/ykzZ5jbgNeC+JGuAQ8COqppO8mR3QhdgV1W9nOTObv3bmAn97Yu/G5KkQc0Z/FX1K+DP+iy6rE/f3cDuWW3HgWuHHaAkaXF5AZckNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNWag4E+yK8lEkieSfLhrm+j5uapruyPJ95McSHJJ1/auJN/t2vYmefvS7Y4kaS6nzdUhydXAy1W1Nck64JEkjwE/r6ptPf0uB9ZV1ZYkZwP7knwI+CLw2ap6unuD+DRw+5LsjSRpToPM+J8DvgpQVa8CLwAXA+/rZvtf6PpdDtzT9TsOHAAuAN5ZVU937Q8DmxZ1DyRJ8zJn8FfVoao6BpDko8A/Af8KnF9VW4E1Sa4B1gMv9qx6BDgXODHrKU/2206S7Ukmk0xOTU3Ne0ckSYMZ+ORukg8Am6vqy1X1i6qa7hbdz8xfAMeAsZ5VNgBHgQyyzaraU1XjVTU+NjbWr4skaREMenJ3C/AR4NYka5I8muSMbvG1wFPA94Abuv5nA1uAnwDHk1zQtV8JPLuoeyBJmpdBTu5uBh4CngH2d837gIkk08DjVbWv63tFkgNdn9urajrJZ4A93YnhnwGfXOydkCQNbs7gr6qDwNl9Fu3u03dnn7YXgCuHGp0kadF5AZckNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjTH4JakxBr8kNWag4E+yK8lEkieSfDjJpiT7u7a7k6zt+t2S5AdJDia5ums7K8k3kxxI8p0kG5ZyhyRJb+20uTp0Af5yVW1Nsg54BDgOXF9Vh5PcDNyY5DFgK7AZWAs8lOQgsAPYW1UPJLkIuBO4bml2R5I0lzmDH3gOOAhQVa8mOQr8qqoOd8v3AHuBAu6tqgJeT7IXuBS4sKo+163/oyRnLfZOSJIGN2epp6oOVdUxgCQfBX4IHO1ZPs3MG8h64MWeVY8A5wInZz3lq0netsBxS5KGNPDJ3SQfYKaMsws4p6d9LXACOAaM9ayygZ43iB5nVtVrfZ5/e5LJJJNTU1ODDkuSNE+DntzdAnwEuLWqXgfOTHJet/gmZur+++lq90lOB7YBE8ChJFd27Rcyc37g11TVnqoar6rxsbGxfl0kSYtgkJO7m4GHgGeA/UkAbgHuS7IGOATsqKrpJE92J3QBdlXVy0nuBL6W5DZmQn/7UuyI2rVx58Pz6v/8l65aopFIK8OcwV9VB4Gz+yy6rE/f3cDuWW3HgWuHHaAkaXF5AZckNcbgl6TGGPyS1BiDX5IaY/BLUmMMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktSYQb6IRZI0pPncRHC5biDojF+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSY7xlwwpzKl7+LWllccYvSY0x+CWpMQMFf5KPJXk+yXt72iZ6fq7q2u5I8v0kB5Jc0rW9K8l3u7a9Sd6+NLsiSRrEQDX+qnowycVvPE5yOvDzqtrW03Y5sK6qtiQ5G9iX5EPAF4HPVtXT3RvEp4HbF3UvJEkDG7bUcxHwvm62/4Wu7XLgHoCqOg4cAC4A3llVT3ftDwObFjZkSdJCDBv8PwXOr6qtwJok1wDrgRd7+hwBzgVOzFr35JDblCQtgqGCv6p+UVXT3cP7gYuBY8BYT7cNwFEgg2wzyfYkk0kmp6amhhmWJGkA8w7+JGuSPJrkjK7pWuAp4HvADV2fs4EtwE+A40ku6NqvBJ7t97xVtaeqxqtqfGxsrF8XSdIimPcFXFV1MslXgIkk08DjVbUPIMkVSQ50XW+vqukknwH2JFkH/Az45GINXpI0fwMHf1V9vuf3B4EH+/TZ2aftBeDKIcenFcorjKVTlxdwSVJjDH5JaozBL0mN8e6cC2QtW9JKY/DrlDGfN1HwjVQa1qoLfmfgkvTWrPFLUmNW3Yxf0gz/+tWbccYvSY0x+CWpMZZ6JP0/lohWP2f8ktQYg1+SGmPwS1JjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSY7xXjzRPflOYVjpn/JLUGINfkhozUKknyceA3cDVVfVMkk3A3wIBngX+oqqmk9wC/AlQwK6qeiDJWcA9wG8Cx4HtVXVkCfZFmhdLNmrVQDP+qnoQ+HpP013A9VW1FTgE3Jjkd4GtwGbgcuDPk6wH/grYW1WXAp8D7lyswUuS5m/epZ4kpwOvVNXhrmkPcAVwGXBvzXgd2AtcClxYVQ8AVNWPgLMWZeSSpKEMU+NfDxx940FVTTNTMloPvNjT7whwLnBy1vqvJnnbENuVJC2CYT7OeQw4540HSdYCJ7r2sZ5+G+h5g+hxZlW9NrsxyXZgO8C73/3uIYYlSUtnNX0l5bxn/F0Z58wk53VNNwGPAPuB6+B/y0HbgAngUJIru/YLmTnB2+9591TVeFWNj42N9esiSVoEw17AtRO4L8kaZk7u7ug+1fNkkoNdn11V9XKSO4GvJbmN7lM9Cx+2JGlYAwd/VX2+5/d/YeZk7uw+u5n52Gdv23Hg2uGHKElaTF7AJUmNMfglqTEGvyQ1xuCXpMYY/JLUGO/HPyKr6WIQSSuLM35JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGINfkhpj8EtSYwx+SWqMwS9JjfGWDZIWhbchWTmc8UtSYwx+SWqMwS9JjbHGL2mkPDew/JzxS1JjDH5JaozBL0mNMfglqTFDB3+Sq5IcSjKR5FtJNiXZ3z2+O8nart8tSX6Q5GCSqxdv6JKkYSzkUz3vB7ZV1Y8BknwXuL6qDie5GbgxyWPAVmAzsBZ4KMnBqjq2wHFLkoa0kOAfBy5JcgLYCbxSVYe7ZXuAvUAB91ZVAa8n2QtcCnx7AduVJC3AQoL/hqp6KclvAfcC//bGgqqaTnIasB745551jgC/s4BtStKCeN3AAmr8VfVS9+8LwH8DG95Y1tX3TwDHgLGe1TYAR/s9X5LtSSaTTE5NTQ07LEnSHIYK/iSfSvLx7vcNwG8AZyQ5r+tyE/AIsB+4rut3OrANmOj3nFW1p6rGq2p8bGysXxdJ0iIYttRzL/CNJJ9gZma/g5l6/n1J1gCHgB1dyefJJAe79XZV1csLHrUkaWhDBX9V/RL44z6LLuvTdzewe5jtSJIWnxdwSVJjDH5JaozBL0mNMfglqTEGvyQ1xuCXpMYY/JLUGL9zt+P9O/rzuEirjzN+SWqMwS9JjbHU0whLNpLe4Ixfkhpj8EtSYwx+SWqMNX5JK5LnrYbnjF+SGmPwS1JjDH5Jaow1fmmZzKcmDdaltXSc8UtSYwx+SWqMwS9JjTH4JakxBr8kNcbgl6TGLEvwJ7kjyfeTHEhyyXJsU5LU35J/jj/J5cC6qtqS5GxgX5IPVdX0Um9bkvTrlmPGfzlwD0BVHQcOABcsw3YlSX0sR/CvB17seXwEOHcZtitJ6iNVtbQbSO4A/qGqnu0e/zXwUFU9PavfdmB79/D3gZ8u4jDOAV5axOdbLTwu/Xlc+vO49HeqHJf3VNXYIB2XI/ivAD5YVbd2Nf5vA3+0nDX+JJNVNb5c21spPC79eVz687j0txKPy5Kf3K2qR5NckeRA13S7J3YlaXSW5e6cVbVzObYjSZpbKxdw7Rn1AE5RHpf+PC79eVz6W3HHZclr/JKkU0srM35JUmfVB7+3i+gvyVVJDiWZSPKtUY9nlJJ8LMnzSd7bPd6UZH93bO5OsnbUYxyF2cela5vo+WnuK8KS7Or2/YkkH16pr5VV/dWL3i7iLb0f2FZVPx71QEatqh5McnFP013A9VV1OMnNwI3A341mdKMz+7gkOR34eVVtG+GwRibJ1cDLVbU1yTrgEeA4K/C1stpn/N4u4s2NA3cl+cck5496MKeKLtxeqarDXdMe4IoRDulUchHwvm52+4VRD2YEngO+ClBVrwJHWaGvldUe/N4u4s3dUFUfBG4CvjzqwZxC1jPzPzQA3V+Hq/ov43n4KXB+VW0F1iS5ZsTjWVZVdaiqjgEk+SjwQ1boa2W1B/8xoPcS5g30/IdqWVW91P37AvBfK6U2uQyOMXMJPgDdcTkxuuGcOqrqFz1l0vuBi9+q/2qV5APAZmAXK/S1stqD/3vADQBdjX8L8JNRDuhUkORTST7e/b4BeIfnPWZU1evAmUnO65puYqaW27Qka5I8muSMrula4KlRjmkUkmwBPgLcupJfKyviz5JhebuIN3Uv8I0kn2BmhrJjxOM51ewE7kuyBjiEx4eqOpnkK8BEkmng8araN+pxLackm4GHgGeA/UkAbmEFvla8gEuSGrPaSz2SpFkMfklqjMEvSY0x+CWpMQa/JDXG4Jekxhj8ktQYg1+SGvM/Gr9246idGuQAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "df = pd.DataFrame({\"columns\":list(feat_names), \"importance\":list(lg.feature_importances_.T)})\n",
    "df = df.sort_values(by=['importance'],ascending=False)\n",
    "\n",
    "plt.bar(range(len(lg.feature_importances_)), lg.feature_importances_)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>columns</th>\n",
       "      <th>importance</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>Monthly_Income</td>\n",
       "      <td>3131</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Loan_Amount_Submitted</td>\n",
       "      <td>1960</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>Age</td>\n",
       "      <td>1761</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Existing_EMI</td>\n",
       "      <td>1604</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Interest_Rate</td>\n",
       "      <td>1443</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>EMI_Loan_Submitted</td>\n",
       "      <td>1382</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>Loan_Amount_Applied</td>\n",
       "      <td>1370</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>City</td>\n",
       "      <td>1356</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>Processing_Fee</td>\n",
       "      <td>1327</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>Salary_Account</td>\n",
       "      <td>1275</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>Loan_Tenure_Submitted</td>\n",
       "      <td>1094</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>Loan_Tenure_Applied</td>\n",
       "      <td>1052</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>Var5</td>\n",
       "      <td>993</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Employer_Name</td>\n",
       "      <td>861</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>Source</td>\n",
       "      <td>697</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>Var1</td>\n",
       "      <td>631</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>Var4</td>\n",
       "      <td>504</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Device_Type</td>\n",
       "      <td>337</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Gender</td>\n",
       "      <td>323</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Filled_Form</td>\n",
       "      <td>285</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>Var2</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>Mobile_Verified</td>\n",
       "      <td>163</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  columns  importance\n",
       "13         Monthly_Income        3131\n",
       "9   Loan_Amount_Submitted        1960\n",
       "21                    Age        1761\n",
       "4            Existing_EMI        1604\n",
       "7           Interest_Rate        1443\n",
       "2      EMI_Loan_Submitted        1382\n",
       "8     Loan_Amount_Applied        1370\n",
       "0                    City        1356\n",
       "14         Processing_Fee        1327\n",
       "15         Salary_Account        1275\n",
       "11  Loan_Tenure_Submitted        1094\n",
       "10    Loan_Tenure_Applied        1052\n",
       "20                   Var5         993\n",
       "3           Employer_Name         861\n",
       "16                 Source         697\n",
       "17                   Var1         631\n",
       "19                   Var4         504\n",
       "1             Device_Type         337\n",
       "6                  Gender         323\n",
       "5             Filled_Form         285\n",
       "18                   Var2         230\n",
       "12        Mobile_Verified         163"
      ]
     },
     "execution_count": 37,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4、保存模型\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle\n",
    "\n",
    "pickle.dump(lg, open(\"HappyBank/model/HappyBank_LightGBM_.pkl\", 'wb'))"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
